1 WRANGLING

Initial pilot data were collected via Qualtrics surveys named MAG_S2_PROLIFIC-DATACOLLAR_0 and MAG_S2_PROLIFIC-DATACOLLAR_0, each presenting participants with one common graph (“block 0”, STIMULUS = B0-0) and four subsequent graphs in random order (“block 1”).

Subsequently we made minor alterations to the question wording and respose wording, and the remaining stimuli were organized in 6 blocks. Each block was collected via PROLIFIC using independent Qualtrics surveys, with the exception of a sample of respondents directly recruited from Tumblr, using a single Qualtrics surveys with randomization logic to assign each Tumblr participant to one of the six stimulus blocks.

Participants were excluded from the sample for the following reasons

  • abandoned; Finished = False and Progress < 100%
  • fails consent or pre-screening, Finished = TRUE, Progress = 100%, Q_TerminateFlag = “Screened”
  • didnot-follow-instructions free responses indicate they misinterpred the question (most commonly they describe the graph rather than their reactions/judgements/impressions of it)
  • illegible-english free responses text is largely illegible

Following wrangling, there should be 6 blocks of stimuli, each containing responses from 40 participants from prolific, and TODO responses directly from Tumblr.

1.0.1 Import Configuration Files

############## IMPORT STIMULI FILE
df_stimuli <- read_csv("data/input/stimuli.csv", col_names = TRUE) %>% 
  mutate(
    BLOCK = as.factor(BLOCK), 
    STIMULUS_CATEGORY = as.factor(CATEGORY),
    ID = as.factor(ID),
    MAKER_ID = as.factor(MAKER_ID)
  )

############## BUILD LABELS
ref_stimuli <- levels(df_stimuli$ID)
ref_cat_questions <- c("MAKER_ID","MAKER_AGE","MAKER_GENDER")
ref_free_response <- c("MAKER_DETAIL", "MAKER_EXPLAIN", "TOOL_DETAIL", "CHART_EXPLAIN")
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- c("MAKER_DESIGN","MAKER_DATA","MAKER_POLITIC",
               "MAKER_ARGUE","MAKER_SELF","MAKER_ALIGN","MAKER_TRUST",
               "CHART_LIKE", "CHART_BEAUTY", "CHART_INTENT", "CHART_TRUST")
left <- c("professional","professional","left-leaning","confrontational",
          "altruistic","does NOT share","untrustworthy",
          "NOT at all","NOT at all", "inform", "untrustworthy")
right <- c("layperson","layperson","right-leaning","diplomatic",
           "selfish", "DOES share", "trustworthy",
           "very much", "very much", "persuade", "trusthworthy")
ref_labels <- as.data.frame(cbind(left,right))
rownames(ref_labels) <- ref_sd_questions
ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
rm(left,right)

############## STUDY ID FILE
#most blocks were run as separate qualtrics surveys with diffferent recruitments in Prolific
#Tumblr was run with all blocks and randomization
df_studies <- read_csv("data/input/studies.csv", col_names = TRUE) %>%
  mutate(
    ID.Study = as.factor(ID.Study),
    Assigned.Block = as.factor(Assigned.Block),
    Distribution = as.factor(Distribution),
    Prolific.Name = as.factor(Prolific.Name),
    Qualtrics.URL = as.factor(Qualtrics.URL),
    Qualtrics.Survey = as.factor(Qualtrics.Survey),
    Sample = as.factor(Sample),
    Scope = as.factor(Scope)
)

1.0.2 Import Data Files

#1. IMPORT RAW DATA FILES ########################################################
#### RAW DATA ####################################################################
# will always be the unaltered version of imported data
# 1 row per subject 
df_raw_datacollar <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_datacollarpilot_B1.csv", col_names = TRUE)
df_raw_bluecollar <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_bluecollarpilot_B1.csv", col_names = TRUE)
df_raw_b1 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B1.csv", col_names = TRUE)
df_raw_b2 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B2.csv", col_names = TRUE)
df_raw_b3 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B3.csv", col_names = TRUE)
df_raw_b4 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B4.csv", col_names = TRUE)
df_raw_b5 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B5.csv", col_names = TRUE)
df_raw_b6 <- read_csv("data/input/CLEAN_MAG_S2_PROLIFIC_GENERAL_B6.csv", col_names = TRUE)
df_raw_tumblr_paid <- read_csv("data/input/CLEAN_MAG_S2_TUMBLR_PAID_ALL.csv", col_names = TRUE)
df_raw_tumblr_free <- read_csv("data/input/CLEAN_MAG_S2_TUMBLR_FREE_ALL.csv", col_names = TRUE)

# drop first two rows (qualtrics_specs)
df_raw_datacollar <- df_raw_datacollar[-c(1:2),]
df_raw_bluecollar <- df_raw_bluecollar[-c(1:2),]
df_raw_b1 <- df_raw_b1[-c(1:2),]
df_raw_b2 <- df_raw_b2[-c(1:2),]
df_raw_b3 <- df_raw_b3[-c(1:2),]
df_raw_b4 <- df_raw_b4[-c(1:2),]
df_raw_b5 <- df_raw_b5[-c(1:2),]
df_raw_b6 <- df_raw_b6[-c(1:2),]
df_raw_tumblr_paid <- df_raw_tumblr_paid[-c(1:2),]
df_raw_tumblr_free <- df_raw_tumblr_free[-c(1:2),]

# ADD DUMMY COLS TO DATA/BLUE COLLAR PILOT DATA 
# necessary b/c pilot (block 1) did not have chart_action behavioural question
# x <- compare_df_cols(df_raw_b2, df_raw_pilot)
df_raw_pilot <- rbind(df_raw_datacollar, df_raw_bluecollar) %>% 
  mutate(
    '0_Q_B0_CHART_ACTION' = NA,
    '1_Q_B1_CHART_ACTION' = NA,
    '1_Q_B2_CHART_ACTION' = NA,
    '1_Q_B3_CHART_ACTION' = NA,
    '1_Q_B4_CHART_ACTION' = NA,
    '1_Q_B5_CHART_ACTION' = NA,
    '1_Q_B6_CHART_ACTION' = NA,
    '2_Q_B1_CHART_ACTION' = NA,
    '2_Q_B2_CHART_ACTION' = NA,
    '2_Q_B3_CHART_ACTION' = NA,
    '2_Q_B4_CHART_ACTION' = NA,
    '2_Q_B5_CHART_ACTION' = NA,
    '2_Q_B6_CHART_ACTION' = NA,
    '3_Q_B1_CHART_ACTION' = NA,
    '3_Q_B2_CHART_ACTION' = NA,
    '3_Q_B3_CHART_ACTION' = NA,
    '3_Q_B4_CHART_ACTION' = NA,
    '3_Q_B5_CHART_ACTION' = NA,
    '3_Q_B6_CHART_ACTION' = NA,
    '4_Q_B1_CHART_ACTION' = NA,
    '4_Q_B2_CHART_ACTION' = NA,
    '4_Q_B3_CHART_ACTION' = NA,
    '4_Q_B4_CHART_ACTION' = NA,
    '4_Q_B5_CHART_ACTION' = NA,
    '4_Q_B6_CHART_ACTION' = NA
  )

# BIND COLUMNS RAW prolific datasets 
df_raw_prolific <- rbind(df_raw_pilot, df_raw_b1, df_raw_b2, df_raw_b3, df_raw_b4, df_raw_b5, df_raw_b6)

# RETROFIT SOME COLNAMES FOR COMPATIBILITY WITH TUMBLR 
df_raw_prolific <- df_raw_prolific %>% 
  #drop T_BROWSER cols [these are blank]
  select(-contains("T_BROWSER"), -T_EMAIL) %>% 
  mutate(
    Q_RelevantIDDuplicate = NA, 
    Q_RelevantIDDuplicateScore = NA, 
    Q_RelevantIDFraudScore = NA, 
    Q_RelevantIDLastStartDate = NA, 
    RANDOM_BLOCK = NA
  ) %>% rename_with(
    stringr::str_replace, 
      pattern = "P_BROWSER", replacement = "BROWSER",
  ) %>% 
  rename(
    BROWSER_OS = `BROWSER_Operating System`
  ) %>% mutate_all(funs(str_replace(., "millenial", "millennial")))


# BIND COLUMNS RAW prolific datasets 
df_raw_tumblr <- rbind(df_raw_tumblr_paid, df_raw_tumblr_free)

# RETROFIT SOME TUMBLR COLNAMES FOR MERGING WITH PROLIFIC 
df_raw_tumblr <- df_raw_tumblr %>% 
  select( - t_example_question1, -t_example_question2, -D_email) %>% 
  mutate(
    #add empty browser cols; didn't collect these in tumblr qualtrics
    BROWSER_Browser = NA,
    BROWSER_OS = NA,
    BROWSER_Resolution = NA,
    BROWSER_Version = NA, 
    ID_PROLIFIC = "TUMBLR",
    ID_SESSION = "TUMBLR",
    ID_STUDY = "TUMBLR"
  ) %>% mutate_all(funs(str_replace(., "millenial", "millennial")))

#dataframe column comparisons 
# x <- janitor::compare_df_cols(df_raw_prolific, df_raw_tumblr)

# JOINT PROLIFIC AND TUMBLR DATAFRAMES
df_raw <- rbind(df_raw_prolific, df_raw_tumblr)

df_raw <- df_raw %>% 
  #reorder 
  select( RANDOM_BLOCK, 
          End_State,
          StartDate:randomize_common, 
          ID_PROLIFIC:ID_SESSION, 
          D_gender:FEEDBACK, 
          PROLIFIC_PID: FL_14_DO,
          Q_RelevantIDDuplicate: Q_RelevantIDLastStartDate, 
          `0_Q_B0_ENCOUNTER`: `0_Q_B0_CHART_LATENCY_Click Count`, 
          `0_Q_B0_CHART_ACTION`, 
          `1_Q_B1_loop-number` : `1_Q_B1_CHART_LATENCY_Click Count`,
          `1_Q_B1_CHART_ACTION`, 
          `2_Q_B1_loop-number` : `2_Q_B1_CHART_LATENCY_Click Count`,
          `2_Q_B1_CHART_ACTION`, 
          `3_Q_B1_loop-number` : `3_Q_B1_CHART_LATENCY_Click Count`,
          `3_Q_B1_CHART_ACTION`, 
          `4_Q_B1_loop-number` : `4_Q_B1_CHART_LATENCY_Click Count`,
          `4_Q_B1_CHART_ACTION`, 
          `1_Q_B2_loop-number` : `1_Q_B2_CHART_LATENCY_Click Count`,
          `1_Q_B2_CHART_ACTION`, 
          `2_Q_B2_loop-number` : `2_Q_B2_CHART_LATENCY_Click Count`,
          `2_Q_B2_CHART_ACTION`, 
          `3_Q_B2_loop-number` : `3_Q_B2_CHART_LATENCY_Click Count`,
          `3_Q_B2_CHART_ACTION`, 
          `4_Q_B2_loop-number` : `4_Q_B2_CHART_LATENCY_Click Count`,
          `4_Q_B2_CHART_ACTION`, 
          `1_Q_B3_loop-number` : `1_Q_B3_CHART_LATENCY_Click Count`,
          `1_Q_B3_CHART_ACTION`, 
          `2_Q_B3_loop-number` : `2_Q_B3_CHART_LATENCY_Click Count`,
          `2_Q_B3_CHART_ACTION`, 
          `3_Q_B3_loop-number` : `3_Q_B3_CHART_LATENCY_Click Count`,
          `3_Q_B3_CHART_ACTION`, 
          `4_Q_B3_loop-number` : `4_Q_B3_CHART_LATENCY_Click Count`,
          `4_Q_B3_CHART_ACTION`, 
          `1_Q_B4_loop-number` : `1_Q_B4_CHART_LATENCY_Click Count`,
          `1_Q_B4_CHART_ACTION`, 
          `2_Q_B4_loop-number` : `2_Q_B4_CHART_LATENCY_Click Count`,
          `2_Q_B4_CHART_ACTION`, 
          `3_Q_B4_loop-number` : `3_Q_B4_CHART_LATENCY_Click Count`,
          `3_Q_B4_CHART_ACTION`, 
          `4_Q_B4_loop-number` : `4_Q_B4_CHART_LATENCY_Click Count`,
          `4_Q_B4_CHART_ACTION`, 
          `1_Q_B5_loop-number` : `1_Q_B5_CHART_LATENCY_Click Count`,
          `1_Q_B5_CHART_ACTION`, 
          `2_Q_B5_loop-number` : `2_Q_B5_CHART_LATENCY_Click Count`,
          `2_Q_B5_CHART_ACTION`, 
          `3_Q_B5_loop-number` : `3_Q_B5_CHART_LATENCY_Click Count`,
          `3_Q_B5_CHART_ACTION`, 
          `4_Q_B5_loop-number` : `4_Q_B5_CHART_LATENCY_Click Count`,
          `4_Q_B5_CHART_ACTION`, 
          `1_Q_B6_loop-number` : `1_Q_B6_CHART_LATENCY_Click Count`,
          `1_Q_B6_CHART_ACTION`, 
          `2_Q_B6_loop-number` : `2_Q_B6_CHART_LATENCY_Click Count`,
          `2_Q_B6_CHART_ACTION`, 
          `3_Q_B6_loop-number` : `3_Q_B6_CHART_LATENCY_Click Count`,
          `3_Q_B6_CHART_ACTION`, 
          `4_Q_B6_loop-number` : `4_Q_B6_CHART_LATENCY_Click Count`,
          `4_Q_B6_CHART_ACTION`
          #df_raw 937 vars           
  ) 



#DROP WIP DATAFRAMES 
rm(df_raw_datacollar, df_raw_bluecollar, df_raw_b1, df_raw_b2, df_raw_b3, df_raw_b4, df_raw_b5, df_raw_b6, df_raw_pilot, df_raw_prolific, df_raw_tumblr, df_raw_tumblr_free, df_raw_tumblr_paid)

1.0.3 Clean Data

#2. CLEAN MASTER PARTICIPANT-LEVEL DF  #########################################################
################################################################################################
#### MASTER WIDE FORMAT DATA FRAME [1 row / qualtrics submission] ################

df_data <- df_raw %>% 
  select(
    -EndDate, -IPAddress, -RecordedDate,
    -RecipientLastName, -RecipientFirstName, -RecipientEmail,
    -ExternalReference, -LocationLatitude, -LocationLongitude, 
    -DistributionChannel, -UserLanguage, -Q_RecaptchaScore,
    -BROWSER_Version, -BROWSER_Resolution, 
    -CONSENT, -ELIGIBILITY, 
    -randomize_common,
    #hidden q that controls common stimulus url
    -stimulus_common, 
    #not actually randomization order
    -FL_14_DO, 
    -contains("First Click"), -contains("Last Click"), -contains("Click Count"),
    -D_politicalParty_DO, 
    -ID_PROLIFIC, -ID_STUDY, -ID_SESSION #redundant to other cols 
  ) %>% 
  rename(
    duration.sec = `Duration (in seconds)`,
    EndState = End_State, 
    TerminateFlag = Q_TerminateFlag,
    Source = Status, #where the survey originated from (should not be preview or test)
    PLATFORM = Q_PLATFORM,
    ID.Qualtrics = ResponseId,
    ID.Prolific = PROLIFIC_PID,
    ID.Study = STUDY_ID,
    ID.Session = SESSION_ID,
    # P_BROWSER_OS = `P_BROWSER_Operating System`,
    # T_BROWSER_OS = `T_BROWSER_Operating System`,
    SCREEN_workFunction_TEXT = SCREEN_workFunction_22_TEXT, 
    SCREEN_socialMedia_TEXT = SCREEN_socialMedia_18_TEXT,
    D_politicalParty_OTHER = D_politicalParty_4_TEXT,
    D_politicsSocial = D_politicsSocial_1,
    D_politicsFiscal = D_politicsFiscal_2
 ) %>% 
  mutate(
    #SET FACTORS 
    D_politicsSocial = as.numeric(D_politicsSocial),
    D_politicsFiscal = as.numeric(D_politicsFiscal),
    ID.Study = factor(ID.Study),
    ID.Qualtrics = factor(ID.Qualtrics),
    ID.Prolific = factor(ID.Prolific),
    ID.Session = factor(ID.Session),
    PLATFORM = factor(PLATFORM),
    Source = factor(Source),
    Finished = as.logical(Finished),
    TerminateFlag = factor(TerminateFlag),
    EndState = factor(EndState), 
    D_gender = factor(D_gender), 
    D_gender_collapsed = fct_collapse(D_gender, 
                                female = "Female", 
                                male = "Male", 
                                other = c("Non-binary / third gender", "Prefer not to say", "Prefer to self-describe")),
    D_age = factor(D_age), 
    D_income = factor(D_income, 
                      levels = c(
                        "Prefer not to say",
                        "Less than $25,000",
                        "$25,000-$49,999"  ,
                        "$50,000-$74,999"  ,
                        "$75,000-$99,999"  ,
                        "$100,000-$149,999",
                        "$150,000 or more" 
                      )),
    D_employmentStatus = factor(D_employmentStatus),
    duration.sec = as.numeric(duration.sec), #weird booleans should only be for the test generator
    duration.min = round(duration.sec/60,2),
    Progress = as.numeric(Progress), 
    D_education = forcats::fct_na_value_to_level( D_education, level="NA"),
    D_education = forcats::fct_collapse( D_education,
                                no_data = "NA",
                                less_high_school = c("Some high school or less"),
                                high_school = c("High school diploma or GED"),
                                some_college = c("Some college, but no degree", "Some college, no degree"),
                                associates =c( "Associates or technical degree"),
                                undergrad = c("Bachelor’s degree","Bachelor's degree"),
                                grad = c("Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS etc.)",
                                         "Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS, etc)")
    ),
    D_education = factor(D_education, 
                                       levels = c("no_data", "less_high_school","high_school", "some_college",
                                         "associates", "undergrad", "grad"), 
                                       labels = c("NA", "some high school or less","high school diploma or GED ", 
                                                  "some college", "associates or technical degree", 
                                                  "undergradudate degree", "graduate or professional degree"), 
                                       ),
    D_politicalParty = factor(D_politicalParty, levels = c("No preference", "Other", "Independent", "Republican", "Democrat")),
    D_age = factor(D_age, 
                   levels = c("18-24 years old" ,
                              "25-34 years old" ,
                              "35-44 years old" ,
                              "45-54 years old" ,
                              "55-64 years old" ,
                              "65+ years old"   ), 
                   labels = c("18-24", "25-34","35-44","45-54","55-64","65+ years"))
  ) %>%
  #REPLACE RANDOM TRAILING _1 AND _65 FROM QUALTRICS
  rename_with( .cols = contains('_65'), .fn = ~str_replace(.,  pattern = '_65', replacement = '')) %>% 
  rename_with( .cols = contains('_1'), .fn = ~str_replace(.,  pattern = '_1', replacement = '')) %>% 
  #RM _PAGE Submit from LATENCY
  rename_with( .cols = contains('_Page Submit'), .fn = ~str_replace(.,  pattern = '_Page Submit', replacement = '')) %>% 
  #CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES 
  rename_with( .cols = contains('_CHART_'), .fn = ~str_replace(.,  pattern = '_CHART_', replacement = '_CHART-')) %>% 
  #CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES 
  rename_with( .cols = contains('_MAKER_'), .fn = ~str_replace(.,  pattern = '_MAKER_', replacement = '_MAKER-')) %>% 
  #CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES 
  rename_with( .cols = contains('_AGE_'), .fn = ~str_replace(.,  pattern = '_AGE_', replacement = '_AGE-')) %>% 
  #CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES 
  rename_with (.cols = contains('_GENDER_'), .fn = ~str_replace(., pattern = '_GENDER_', replacement = '_GENDER-')) %>% 
  #CHANGE DETAIL QUESTION DELIMITER FOR PIVOT PURPOSES 
  rename_with (.cols = contains('_TOOL_'), .fn = ~str_replace(., pattern = '_TOOL_', replacement = '_TOOL-')) %>% 
  select(
    #reordering
    RANDOM_BLOCK:Progress, 
    Finished, EndState, TerminateFlag, 
    Q_RelevantIDDuplicate:Q_RelevantIDLastStartDate,
    ID.Qualtrics, 
    ID.Prolific : ID.Session, 
    duration.sec, duration.min,
    BROWSER_Browser : BROWSER_OS, 
    D_gender_collapsed, D_gender:D_politicsFiscal,
    SCREEN_workMethod: SCREEN_socialMedia_TEXT, 
    PURPOSE, FEEDBACK, PLATFORM,
    `0_Q_B0_ENCOUNTER`: `4_Q_B6_CHART-ACTION`
  ) 

##### JOIN STUDY-LEVEL DATA 
df_data <- dplyr::left_join(df_data, df_studies, by="ID.Study") 

#SET IDS AND ASSIGNED BLOCK TO HANDLE PROLIFIC AND TUMBLR 
df_data <- df_data %>% 
  mutate(
    # SET ASSIGNMENT BLOCK FOR TUMBLR
    Assigned.Block = if_else( (Distribution =="TUMBLR"), RANDOM_BLOCK, Assigned.Block),
    Assigned.Block = factor(Assigned.Block), 
    # PID = if_else( (Distribution =="TUMBLR"), ID.Qualtrics, ID.Prolific),
    PID = factor(ID.Qualtrics)
  ) %>% #DROP RANDOM_BLOC COLUMNS
  select (-RANDOM_BLOCK) %>% 
  select(
    #REORDER
    PID, 
    Distribution, 
    Assigned.Block,
    ID.Qualtrics:ID.Session,
    EndState,
    StartDate: Q_RelevantIDLastStartDate, 
    Prolific.Name, Qualtrics.Survey,  Qualtrics.URL, Description, Sample, Scope, 
    duration.sec:PLATFORM,
    `0_Q_B0_ENCOUNTER` : `4_Q_B6_CHART-ACTION`
  )


#2B CLEAN MASTER PARTICIPANT-LEVEL DF  #########################################
#### SEGREGATE PARTICIPANTS WHO DID NOT COMPLETE ###############################
## [1 row / qualtrics submission] ################
## NOTE it is common for prolific participants to fail the 
## screening verification, and then try again but change their 
## screening verification answers (ie. one prolific ID for multiple qualrics IDs)
df_exclude <- df_data %>% 
  filter( 
    !is.na(TerminateFlag) | Finished == FALSE | EndState != "COMPLETE"  
  ) %>% 
  select(
    PID, Distribution, ID.Qualtrics, ID.Prolific, ID.Study, Assigned.Block, Scope, Source, Progress, 
    Finished, TerminateFlag, EndState,StartDate, duration.min, 
    D_gender:D_politicsFiscal, SCREEN_workMethod:FEEDBACK, Prolific.Name:Scope
  )  %>% 
  mutate(
    EndState = if_else( (Progress < 100), "abandoned", EndState),
    EndState = if_else( (str_detect(EndState,"screen")), "screened", EndState),
    EndState = if_else( (TerminateFlag == "Screened" & is.na(EndState)), "screened", EndState),
    EndState = factor(EndState)
  )

#### MASTER VALID DATA [WIDE] 1 row per qualtrics entry #########################
## [1 row / qualtrics submission] ################
df_data <- df_data %>% filter(
  PID %nin% df_exclude$PID
  # (Finished == TRUE ) & is.na(TerminateFlag)
) %>% mutate(
  EndState = droplevels(EndState),
  ID.Prolific = droplevels(ID.Prolific),
  ID.Qualtrics = droplevels(ID.Qualtrics), 
  PID = droplevels((PID))
  
)

#sanity check === SHOULD BE O
#no qualtrics surveys in good data that weren't finished
print("Number of PID entries in df_data AND nofinish/excluded? [should be 0]")
## [1] "Number of PID entries in df_data AND nofinish/excluded? [should be 0]"
sum(df_data$PID %in% df_exclude$PID)
## [1] 0
#sanity check === SHOULD BE O
#no duplicated PROLIFIC ids in good data 
print("Number of PIDs duplicated in df_data print [should be 0]")
## [1] "Number of PIDs duplicated in df_data print [should be 0]"
sum(duplicated(df_data$PID))
## [1] 0
## save participant-level data file for qda validation 
## note that this DOES contain pilot data 
## this does NOT contain excluded participants 
write.csv(df_data, file = "data/output/df_participants.csv", na="")


#3 CREATE TRIAL LEVEL DFS FOR QDA ###############################################
#### CHART LEVEL DATA FRAME (LONG) FOR QDA (incl demographics) ##################
#### INCLUDES PILOT DATA FROM DATACOLLAR BLUECOLLAR PROLIFIC RECRUITMENT ########
# 1 ROW / participant X GRAPH including demographics 
# UNRAVEL TO QUESTIONS
df_qda_input <- df_data %>% 
  # select(
  # ID.Qualtrics:ID.Study, PLATFORM,
  # contains("_Q_"), contains("loop")
# ) %>% 
  pivot_longer( #PIVOT ON stimulus
  cols = contains("_Q_"),
  names_to = c("stimulus","dummy","BLOCK","QUESTION"),
  values_to = c("value"),
  names_sep = "_"
) %>% select(-dummy) %>% 
  unite(
   BLOCK:stimulus, col="STIMULUS", sep="-", remove=FALSE
) %>% 
  mutate(
    BLOCK = factor(BLOCK),
    STIMULUS = factor(STIMULUS),
    QUESTION = str_replace_all(QUESTION,"-","_"),
    QUESTION = factor(QUESTION),
    STIMULUS_CATEGORY = str_remove(STIMULUS,"B.-"),
    STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY,
                  levels=c("0","4","3","2","1"),
                  labels= c("F","D","C","B","A"))
) %>% 
  select(-stimulus) %>% 
# RE-RAVEL UP TO STIMULI
  filter(!is.na(value)) %>% 
   pivot_wider(
    names_from = QUESTION,
    values_from = value 
  ) %>%  
  tidyr::unnest() %>%  # handle r coerces values to lists
  mutate(
    across(contains("MAKER_ID") | contains("MAKER_GENDER") | contains("MAKER_AGE"), factor),
    across(contains("_CONF") | contains("_LATENCY"), as.numeric),
    across(MAKER_DESIGN:MAKER_TRUST, as.numeric),
    across(CHART_LIKE:CHART_TRUST, as.numeric),
    ENCOUNTER = factor(ENCOUNTER),
    # loop_number = as.numeric(loop_number),
    # loop_number = ifelse(is.na(loop_number), 0, loop_number),
    MAKER_LATENCY = round(MAKER_LATENCY/60,2), #CHANGE TO MINS
    CHART_LATENCY = round(CHART_LATENCY/60,2) #CHANGE TO MINS
  )
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c()`.
#WRITE A CSV FILE AS THE BASIS FOR THE QUALITATIVE DATA ANALYSIS
write.csv(df_qda_input, file = "data/output/df_qda_input.csv", na="")


#### REMOVE PILOT DATA FROM DF_DATA 
## pilot data IS not included QDA file, but not in quant analysis 
df_data <- df_data %>% 
  filter(Scope != "pilot") %>% 
  mutate(
    ID.Study = droplevels(ID.Study), 
    ID.Prolific = droplevels(ID.Prolific),
    ID.Qualtrics = droplevels(ID.Qualtrics),
    PID = droplevels(PID)
  ) 

########## CHECK BLOCK COUNTS 
# title = "Participants by Condition and Data Collection Modality"
# cols = c("Control Condition","Impasse Condition","Total for Period")


title = "Number of abandoned/screened/rejected attempts "
cols = c("Block","pilot","study2","sum")
cont <- table(df_exclude$Assigned.Block, df_exclude$Scope)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>%  kable_classic()
Number of abandoned/screened/rejected attempts
Block pilot study2 sum
1 107 31 138
2 0 32 32
3 0 36 36
4 0 28 28
5 0 18 18
6 0 25 25
Sum 107 170 277
title = "Number of abandoned/screened/rejected attempts by type"
cols = c("Rejection Type", "Sum")
cont <- table(df_exclude$EndState)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>%  kable_classic()
Number of abandoned/screened/rejected attempts by type
Rejection Type Sum
abandoned 255
didnot-follow-instructions 25
failed_nonconsent 5
illegible-english 2
low-effort 13
low-quality 1
screened 52
Sum 353
title = "Number of successful surveys"
cols = c("Assigned.Block", "Sum")
cont <- table(df_data$Assigned.Block)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>%  kable_classic()
Number of successful surveys
Assigned.Block Sum
1 55
2 52
3 52
4 54
5 53
6 52
Sum 318
title = "Number of successful surveys by distribution"
cols = c("Sampling Platform", "Sum")
cont <- table(df_data$Distribution)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>%  kable_classic()
Number of successful surveys by distribution
Sampling Platform Sum
PROLIFIC 240
TUMBLR 78
Sum 318
title = "Number of successful surveys by distribution and block"
cols = c("Sampling Platform", "Block-1","Block-2","Block-3","Block-4","Block-5","Block-6", "Sum")
cont <- table(df_data$Distribution, df_data$Assigned.Block)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>%  kable_classic()
Number of successful surveys by distribution and block
Sampling Platform Block-1 Block-2 Block-3 Block-4 Block-5 Block-6 Sum
PROLIFIC 40 40 40 40 40 40 240
TUMBLR 15 12 12 14 13 12 78
Sum 55 52 52 54 53 52 318
##### CREATE PARTICIPANT LEVEL SIMPLFIFLIED DATAFRAME 
df_participants <- df_data %>% 
  select(
    PID:Assigned.Block, 
    EndState, Sample, Scope, 
    duration.sec, duration.min, 
    contains("D_"), 
    contains("SCREEN_")
  )

# END WRANGLE MASTER WIDE PARTICIPANT LEVEL DATA FRAME 
################################################################################################
################################################################################################

print("df_data represents full set of valid complete participants [wide]")
## [1] "df_data represents full set of valid complete participants [wide]"
print("df_participants represents full set of valid complete participants [simplified]")
## [1] "df_participants represents full set of valid complete participants [simplified]"

1.0.4 Wrangle Question and Trial Level

#4 CREATE QUESTION LEVEL DFS ###################################################################
#### QUESTION LEVEL DATA FRAME (LONG) ##########################
# unravel ALL the way down to questions 
# 1 row per participant-graph-question
df_questions <- df_data %>% 
  select(
  PID, duration.min,  Assigned.Block,
  Sample, Scope, Distribution, PLATFORM, 
  D_gender_collapsed:D_politicsFiscal, 
  contains("_Q_"), contains("loop"), 
) %>% 
  pivot_longer( #PIVOT ON stimulus
  cols = contains("_Q_"),
  names_to = c("stimulus","dummy","BLOCK","QUESTION"),
  values_to = c("value"),
  names_sep = "_"
) %>% select(-dummy) %>% 
  unite(
   BLOCK:stimulus, col="STIMULUS", sep="-", remove=FALSE
) %>% 
  mutate(
    BLOCK = factor(BLOCK),
    STIMULUS = factor(STIMULUS),
    QUESTION = str_replace_all(QUESTION,"-","_"),
    QUESTION = factor(QUESTION, 
                      levels = c(
                        "ENCOUNTER",    
                        "MAKER_ID",        
                        "MAKER_DETAIL", 
                        "MAKER_CONF",
                        "MAKER_AGE", 
                        "AGE_CONF",
                        "MAKER_GENDER",
                        "GENDER_CONF",
                        "MAKER_DESIGN",  
                        "MAKER_DATA",    
                        "MAKER_POLITIC",
                        "MAKER_ARGUE",  
                        "MAKER_SELF",   
                        "MAKER_ALIGN",    
                        "MAKER_TRUST", 
                        "MAKER_EXPLAIN",
                        "MAKER_LATENCY", 
                        "TOOL_ID",     
                        "TOOL_CONF",     
                        "TOOL_DETAIL",   
                        "CHART_LIKE",    
                        "CHART_BEAUTY",  
                        "CHART_INTENT", 
                        "CHART_TRUST",   
                        "CHART_TYPE",    
                        "CHART_ACTION",  
                        "CHART_EXPLAIN", 
                        "CHART_LATENCY", 
                        "loop_number" )),
    STIMULUS_CATEGORY = str_remove(STIMULUS,"B.-"),
    STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY,
                  levels=c("0","4","3","2","1"),
                  labels= c("F","D","C","B","A"))
) %>% 
  select(-stimulus) %>% filter(!is.na(value))


#### SD QUESTION LEVEL DATA FRAME (wide-stim) ##########################
# ravel up one level from questions
# 1 row per participant-question with all blocks as cols for SD qs
df_sd_questions_wide <- df_questions %>%
  select(-BLOCK,-STIMULUS_CATEGORY) %>% #drop block in order to work at stimulus level
  filter(QUESTION %in% ref_sd_questions) %>% 
  pivot_wider(
    names_from = STIMULUS,
    values_from = value 
  ) %>%  
  tidyr::unnest() %>%  # handle r coerces values to lists
  mutate(
    across(contains("-") , as.numeric),
    QUESTION = droplevels(QUESTION)
  )
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c()`.
### SD QUESTIONS LEVEL DATA FRAME (LONG) #####################
df_sd_questions_long <- df_questions %>% 
  filter(QUESTION %in% ref_sd_questions) %>% 
  mutate(
    QUESTION = droplevels(QUESTION),
    value = as.numeric(value)
  )


#5 SPECIAL DFS FOR MULTISELECT QS  #####################################################
#### MULTI-SELECT QUESTIONS  ##########################

## DF_TOOLS 
# 1 ROW per PARTICIPANT X STIMULUS X tool_id selection 
# tool_id is a multiselect field 
df_tools <- df_questions %>% 
  # select(ID.Prolific, QUESTION, value) %>%
  filter(QUESTION  %in% c("TOOL_ID", "TOOL_CONF")) %>% 
  mutate(QUESTION = fct_drop(QUESTION)) %>% 
  pivot_wider(
    names_from = QUESTION,
    values_from = value 
  ) %>% 
  separate_longer_delim(
    cols = TOOL_ID,
    delim = ","
  ) %>% 
  mutate(TOOL_ID = factor(TOOL_ID, 
                             levels = c("?", "design_basic","design_advanced", "viz_basic", "viz_advanced", "programming")),
         PID = droplevels(PID),
         TOOL_CONF = as.numeric(TOOL_CONF)
         ) 

## DF_ACTIONS 
# 1 ROW per PARTICIPANT X STIMULUS X chart_action selection 
# chart_action is a multiselect field 
df_actions <- df_questions %>% 
  filter(QUESTION == "CHART_ACTION") %>% 
  mutate(QUESTION = fct_drop(QUESTION)) %>% 
  separate_longer_delim(
    cols = value,
    delim = ","
  ) %>% 
  mutate(CHART_ACTION = factor(value, 
                                  levels =  c("NOTHING — just keep scrolling",
                                              "unfollow / block the source",
                                              "post a comment",
                                              "share / repost",
                                              "share / repost WITH comment",
                                              "look up more information about the topic or source"),
                                  labels =  c("nothing",
                                              "unfollow/block",
                                              "comment",  
                                              "share",
                                              "share w/ comment",
                                              "seek information")),
         PID = droplevels(PID)) %>% 
  select(-value,-QUESTION)


#6 DFS FOR TRIAL LEVEL ANALYSIS   ######################################################
#### CHART LEVEL DATA FRAME (LONG) #####################################################
# roll partway back up from questions 
# 1 row per participant X graph 
# unnest https://stackoverflow.com/questions/58035452/pivot-wider-outputs-s3-vctrs-list-of-objects
df_graphs_full <- df_questions %>% 
  pivot_wider(
    names_from = QUESTION,
    values_from = value 
  ) %>%  
  tidyr::unnest() %>%  # handle r coerces values to lists
  mutate(
    across(contains("MAKER_ID") | contains("MAKER_GENDER") | contains("MAKER_AGE"), factor),
    across(contains("_CONF") | contains("_LATENCY"), as.numeric),
    across(MAKER_DESIGN:MAKER_TRUST, as.numeric),
    across(CHART_LIKE:CHART_TRUST, as.numeric),
    ENCOUNTER = factor(ENCOUNTER),
    # loop_number = as.numeric(loop_number),
    # loop_number = ifelse(is.na(loop_number), 0, loop_number),
    MAKER_LATENCY = round(MAKER_LATENCY/60,2), #CHANGE TO MINS
    CHART_LATENCY = round(CHART_LATENCY/60,2), #CHANGE TO MINS
    MAKER_ID = factor( MAKER_ID,levels = c("business", "political", "education","news","organization","individual"))
  )
## Warning: `cols` is now required when using `unnest()`.
## ℹ Please use `cols = c()`.
#7 DFS FOR TRIAL LEVEL ANALYSIS w/o free response ######################################################
#### CHART LEVEL DATA FRAME (LONG) ##########################
## SUBSET OF COLUMNS EXCLUDING THE FREE RESPONSES and MULTISELECT
df_graphs <- df_graphs_full %>% 
  select( !where(is.character))

print("df_graphs_full is trial level dataset; df_graphs is trial level quantitative data only")
## [1] "df_graphs_full is trial level dataset; df_graphs is trial level quantitative data only"

1.1 Wrangle Participant Confidence

## CALCULATE AV CONFIDENCE FOR EACH SEMANTIC DIFFERENTIALS 
df_sds <- df_sd_questions_long %>% 
  group_by(PID, QUESTION) %>% 
  summarize(
    mean = mean(value), 
    sd = sd (value)
  ) 
## `summarise()` has grouped output by 'PID'. You can override using the `.groups`
## argument.
df_sds_WIDE <- df_sds %>% 
    pivot_wider(
      names_from = QUESTION,
      values_from = c(mean, sd)
  )

## CALCULATE AV CONFIDENCE ACROSS SEMANTIC DIFFERENTIALS 
df_grandsds <- df_sd_questions_long %>% 
  group_by(PID) %>% 
  summarize(
    SDIFF_mean = mean(value), 
    SDIFF_sd = sd(value)
  ) 


df_grandsds_LONG <- df_grandsds %>% 
  mutate(
      QUESTION = "SDIFF", 
      mean = SDIFF_mean, 
      sd = SDIFF_sd
) %>% select(-SDIFF_mean, -SDIFF_sd)
  


## CALCULATE AV CONFIDENCE FOR EACH CONFIDENCE
df_confq <- df_questions %>% 
  filter(QUESTION %in% ref_conf_questions)%>% 
  group_by(PID, QUESTION) %>% 
  summarize(
    mean = mean(as.numeric(value)), 
    sd = sd (as.numeric(value))
  ) 
## `summarise()` has grouped output by 'PID'. You can override using the `.groups`
## argument.
df_confq_WIDE <- df_confq %>% 
    pivot_wider(
      names_from = QUESTION,
      values_from = c(mean, sd)
  )

## CALCULATE AV CONFIDENCE ACROSS SEMANTIC DIFFERENTIALS 
df_grandconfs <- df_questions %>% 
  filter(QUESTION %in% ref_conf_questions)%>% 
  group_by(PID) %>% 
  summarize(
    CONF_mean = mean(as.numeric(value)), 
    CONF_sd = sd(as.numeric(value))
  )


df_grandconfs_LONG <- df_grandconfs %>% 
  mutate(
      QUESTION = "CONF", 
      mean = CONF_mean, 
      sd = CONF_sd
) %>% select(-CONF_mean, -CONF_sd) 
  



## JOIN TOGETHER WIDE
df_confidence_wide <- left_join(df_sds_WIDE, df_confq_WIDE, by="PID")
df_confidence_wide <- left_join(df_confidence_wide, df_grandsds, by = "PID")
df_confidence_wide <- left_join(df_confidence_wide, df_grandconfs, by = "PID")
df_confidence_wide <- left_join(df_confidence_wide, df_participants, by = "PID")


##JOIN TOGETHER LONG
df_confidence_long <- rbind(df_sds, df_confq, df_grandsds_LONG, df_grandconfs_LONG) %>% 
  mutate(
    QUESTION = factor(QUESTION)
  ) 

df_confidence_long <- left_join(df_confidence_long, df_participants, by="PID")

rm(df_sds, df_grandsds, df_confq, df_grandconfs, df_sds_WIDE, df_grandsds_LONG, df_confq_WIDE, df_grandconfs_LONG)

print("df_confidence_wide is a participant level dataset with by question and question type mean and df per participant")
## [1] "df_confidence_wide is a participant level dataset with by question and question type mean and df per participant"
print("df_confidence_long is a participant level dataset with by question and question type mean and df per participant")
## [1] "df_confidence_long is a participant level dataset with by question and question type mean and df per participant"
print("In both cases the means are over all 5 stimuli seen by each participant")
## [1] "In both cases the means are over all 5 stimuli seen by each participant"

1.1.1 Vibe Check

#special search for the word "vibe" .... because... vibes 

#get purpose and feedback questions and format as long
df_purpose <- df_data %>% 
  select(
    PID, PURPOSE, FEEDBACK
) %>% mutate(
  STIMULUS = "NONE"
) %>% pivot_longer(
  cols = c("PURPOSE", "FEEDBACK"), 
  names_to = "QUESTION",
  values_to = "value"
)

#get free response questions in long format 
df_free <- df_questions %>% 
  select(
    PID, STIMULUS, QUESTION, value
  ) %>% 
  filter(
    QUESTION %in% ref_free_response
  )


#join these dataframes 
df_vibes <- rbind(df_purpose, df_free)
##6992

#SEARCH FOR VIBES! 
df_vibes <- df_vibes %>% 
  filter(
    str_detect(value, regex(
      "vibe",
      ignore_case=TRUE, 
      multiline = TRUE,
      dotall = TRUE))
  )

## ADD PARTICIPANT DATA 
df_vibes <- left_join(df_vibes, df_participants, by="PID")

## WRITE VIBE DATA 
write.csv(df_vibes, file = "data/output/df_vibes.csv", na="")







#special search for the word "core" .... because... tumblr-core
#join these dataframes 
df_core <- rbind(df_purpose, df_free)
##6992

#SEARCH FOR VIBES! 
df_core <- df_core %>% 
  filter(
    str_detect(value, regex(
      "core",
      ignore_case=TRUE, 
      multiline = TRUE,
      dotall = TRUE))
  )

## ADD PARTICIPANT DATA 
df_core <- left_join(df_core, df_participants, by="PID")

## WRITE VIBE DATA 
write.csv(df_core, file = "data/output/df_core.csv", na="")

rm(df_free, df_purpose)

1.1.2 TODO Wrangle QDA

#8 TRIAL LEVEL DATA INCLUDING QDA CODING ############################################################
#### CHART LEVEL DATA FRAME (LONG) ##########################




# ### TODO PICK UP HERE
# 
# 
# # import coded QDA data
# # participant X chart level
# df_coded <- read_csv("data/input/QDA/s2_qda_maker.csv", col_names = TRUE)
# # create unique key for join
# df_coded <- df_coded %>% filter(
#   Scope != "pilot"   #filter out pilot data
# ) %>% mutate(
#   TRIAL = paste0(STIMULUS,"_",ID.Prolific)
# ) %>% select(
#   #TODO ADD CODED DATA THEY ARE ADDED
#   TRIAL, CODE_M_ID_SPECIFIC
# )
# 
# # create base dataframe
# df_graphs_coded <- df_graphs_full %>% mutate(
#   TRIAL = paste0(STIMULUS,"_",ID.Prolific)
# )
# 
# ##SANITY CHECK
# print("coded df & trials df have same num rows")
# nrow(df_coded) == nrow(df_graphs_coded)
# 
# df_graphs_coded <- dplyr::left_join(df_graphs_coded, df_coded, by="TRIAL") %>%
#   select(
#   #reorder cols
#   duration.min:D_politicsFiscal, BLOCK:MAKER_ID, MAKER_CONF:CHART_ACTION, STIMULUS, ID.Prolific,  TRIAL, MAKER_DETAIL, CODE_M_ID_SPECIFIC
# )
# 
# ##SANITY CHECK
# print("coded df & trials df have same num rows")
# nrow(df_graphs) == nrow(df_graphs_coded)
# 
# #####################################################################################
# ################ SANITY CHECKS ######################################################
# #CHECK BLOCKS PER PARTICIPANT
# # every participant should have 2 blocks
# check_subject_blocks <- df_questions %>%
#   group_by(ID.Prolific) %>% summarise(
#     n_block = length(unique(BLOCK))
#   )
# print("every participant should have two blocks, [block 0 + randomly assigned block]")
# all(check_subject_blocks$n_block==2)
# 
# 
# #CHECK STIMULI PER PARTICIPANT
# # every participant should have 5 graphs
# check_subject_graphs <- df_questions %>%
#   group_by(ID.Prolific) %>%summarise(
#     n_graphs = length(unique(STIMULUS))
#   ) #each participant should have five graphs
# print("every participant should have five stimuli [B0 + 4 stimuli in a block]]")
# all(check_subject_graphs$n_graphs==5)
# 
# 
# 
# 
# 
# 
# 
# 
# #CLEANUP
# rm(check_subject_blocks, check_subject_graphs, df_graphs_qda, df_coded)

1.2 Graphing Functions

############## RETURNS SD STACKED AND COLORED BY BY X
## LOOP STYLE
multi_sd <- function (df, left, right, x, y, color) {

  # g <- ggplot(df, aes(y = .data[[x]], x = {{y}}, color = {{color}}))+
  g <- ggplot(df, aes(y = .data[[x]], x = .data[[y]], color = .data[[color]]))+
  geom_boxplot(width = 0.5) +
  geom_jitter(width = 0.1, alpha=0.5) +
  scale_y_continuous(limits=c(-1,101)) +
  labs(x="", y="") +
  coord_flip() +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) + theme_minimal()

  return(g)
}


############## RETURNS SINGLE SD 
## LOOP STYLE
single_sd <- function (df, left, right, x) {

  g <- ggplot(df, aes(y = {{x}}, x = ""))+
  geom_boxplot(width = 0.5) +
  geom_jitter(width = 0.1, alpha=0.5) +
  scale_y_continuous(limits=c(-1,101)) +
  labs(x="", y="") +
  coord_flip() +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) + theme_minimal()

  return(g)
}



######## RETURNS SINGLE SD
##  APPLY STYLE
# plot_sd = function (data, column, type, split, boxplot) {
# 
#   ggplot(df, aes(y = .data[[column]], x="")) +
#     {if(boxplot) geom_boxplot(width = 0.5) } +
#     geom_jitter(width = 0.1, alpha=0.3, {if(split) aes(color=Distribution)}) +
#     {if(split) facet_grid(Distribution ~ .)} +
#     scale_y_continuous(limits=c(-1,101)) +
#     labs(x="", y="") +
#     coord_flip()  +
#     {if(type == "S")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[column,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
#       )} +
#     {if(type == "Q")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[q,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
#       )} +
#   theme_minimal()  +
#      labs (
#        caption = column
#      )
# }


# # ##  APPLY STYLE
plot_sd = function (data, column, type, facet, facet_by, boxplot) {

  ggplot(df, aes(y = .data[[column]], x="")) +
    {if(boxplot) geom_boxplot(width = 0.5) } +
    geom_jitter(width = 0.1, alpha=0.3, {if(facet) aes(color=.data[[facet_by]])}) +
    {if(facet) facet_grid(.data[[facet_by]] ~ .)} +
    scale_y_continuous(limits=c(-1,101)) +
    labs(x="", y="") +
    coord_flip()  +
    {if(type == "S")
      guides(
        y = guide_axis_manual(labels = ref_labels[column,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
      )} +
    {if(type == "Q")
      guides(
        y = guide_axis_manual(labels = ref_labels[q,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
      )} +
  theme_minimal()  +
     labs (
       caption = column
     ) + easy_remove_legend()
}

2 PROFILING

2.1 Descriptives

dfSummary(df_data %>% select(PID, duration.min, Distribution, Assigned.Block, PLATFORM, 
                             contains("D_"), Prolific.Name:Scope, contains("SCREEN_")), 
          headings = TRUE,
          plain.ascii  = FALSE,
          style        = 'grid',
          graph.magnif = 0.85,
          varnumbers = FALSE,
          valid.col    = FALSE,
          tmp.img.dir  = "/tmp")

2.1.1 Data Frame Summary

2.1.1.1

Dimensions: 318 x 30
Duplicates: 0

Variable Stats / Values Freqs (% of Valid) Graph Missing
PID
[factor]
1. R_11XUYk3OpC3HJPM
2. R_133GgyRbPsqfxPD
3. R_1A0uAWls6AhmTfr
4. R_1aUA7MBz4XYLbkl
5. R_1azgETg5gjYGzUf
6. R_1Bc6WwJjiC9VBVn
7. R_1Cq1wvlMciQUHh8
8. R_1D2krAejiiSE3Ln
9. R_1dz1Y8zwrsbEbB6
10. R_1EnkVRz2u8SWbla
[ 308 others ]
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
1 ( 0.3%)
308 (96.9%)
0
(0.0%)
duration.min
[numeric]
Mean (sd) : 44.8 (25.7)
min < med < max:
10.9 < 38.4 < 227.6
IQR (CV) : 25.8 (0.6)
305 distinct values 0
(0.0%)
Distribution
[factor]
1. PROLIFIC
2. TUMBLR
240 (75.5%)
78 (24.5%)
0
(0.0%)
Assigned.Block
[factor]
1. 1
2. 2
3. 3
4. 4
5. 5
6. 6
55 (17.3%)
52 (16.4%)
52 (16.4%)
54 (17.0%)
53 (16.7%)
52 (16.4%)
0
(0.0%)
PLATFORM
[factor]
1. Facebook
2. Instagram
3. LinkedIn
4. Tumblr
5. Twitter/X
73 (23.0%)
93 (29.2%)
8 ( 2.5%)
75 (23.6%)
69 (21.7%)
0
(0.0%)
D_gender_collapsed
[factor]
1. female
2. male
3. other
157 (49.4%)
106 (33.3%)
55 (17.3%)
0
(0.0%)
D_gender
[factor]
1. Female
2. Male
3. Non-binary / third gender
4. Prefer not to say
5. Prefer to self-describe
157 (49.4%)
106 (33.3%)
38 (11.9%)
3 ( 0.9%)
14 ( 4.4%)
0
(0.0%)
D_gender_4_TEXT
[character]
1. genderqueer
2. Agender
3. gender fluid
4. genderqueer (dislike term
5. genderqueer trans man
6. Genderqueer wlw
7. genderqueer woman
8. My sex is female, but I d
9. queer
10. She/They
[ 3 others ]
2 (14.3%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
1 ( 7.1%)
3 (21.4%)
304
(95.6%)
D_race
[character]
1. White or Caucasian
2. Asian
3. Black or African American
4. Other
5. White or Caucasian,Asian
6. White or Caucasian,Black
7. White or Caucasian,Other
8. Prefer not to say
9. White or Caucasian,Americ
10. American Indian/Native Am
[ 4 others ]
213 (67.0%)
34 (10.7%)
34 (10.7%)
11 ( 3.5%)
5 ( 1.6%)
4 ( 1.3%)
4 ( 1.3%)
3 ( 0.9%)
3 ( 0.9%)
2 ( 0.6%)
5 ( 1.6%)
0
(0.0%)
D_education
[factor]
1. NA
2. some high school or less
3. high school diploma or GE
4. some college
5. associates or technical d
6. undergradudate degree
7. graduate or professional
0 ( 0.0%)
4 ( 1.3%)
28 ( 8.8%)
63 (19.8%)
33 (10.4%)
135 (42.5%)
55 (17.3%)
0
(0.0%)
D_employmentStatus
[factor]
1. A homemaker or stay-at-ho
2. Other
3. Retired
4. Student
5. Unemployed and looking fo
6. Working full-time
7. Working part-time
13 ( 4.1%)
12 ( 3.8%)
5 ( 1.6%)
31 ( 9.7%)
50 (15.7%)
156 (49.1%)
51 (16.0%)
0
(0.0%)
D_income
[factor]
1. Prefer not to say
2. Less than $25,000
3. $25,000-$49,999
4. $50,000-$74,999
5. $75,000-$99,999
6. $100,000-$149,999
7. $150,000 or more
14 ( 4.4%)
58 (18.2%)
64 (20.1%)
74 (23.3%)
37 (11.6%)
47 (14.8%)
24 ( 7.5%)
0
(0.0%)
D_work_detail
[character]
1. Unemployed
2. N/A
3. Student
4. homemaker
5. IT
6. Manager
7. Artist
8. Cashier
9. Consultant
10. Disabled
[ 271 others ]
9 ( 2.8%)
5 ( 1.6%)
5 ( 1.6%)
4 ( 1.3%)
3 ( 0.9%)
3 ( 0.9%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
279 (88.3%)
2
(0.6%)
D_zipcode
[character]
1. 30019
2. 11105
3. 20149
4. 27858
5. 28645
6. 37917
7. 60615
8. 77062
9. 85210
10. 95123
[ 295 others ]
3 ( 0.9%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
2 ( 0.6%)
296 (93.4%)
1
(0.3%)
D_age
[factor]
1. 18-24
2. 25-34
3. 35-44
4. 45-54
5. 55-64
6. 65+ years
67 (21.1%)
129 (40.6%)
59 (18.6%)
46 (14.5%)
14 ( 4.4%)
3 ( 0.9%)
0
(0.0%)
D_politicalParty
[factor]
1. No preference
2. Other
3. Independent
4. Republican
5. Democrat
12 ( 3.8%)
29 ( 9.1%)
87 (27.4%)
27 ( 8.5%)
163 (51.3%)
0
(0.0%)
D_politicalParty_OTHER
[character]
1. Socialist
2. leftist
3. socialist
4. Liberal
5. Progressive
6. A progressive who must vo
7. Anarchist
8. Freedom and peace party
9. Green
10. I don’t like any
[ 10 others ]
4 (13.8%)
3 (10.3%)
3 (10.3%)
2 ( 6.9%)
2 ( 6.9%)
1 ( 3.4%)
1 ( 3.4%)
1 ( 3.4%)
1 ( 3.4%)
1 ( 3.4%)
10 (34.5%)
289
(90.9%)
D_politicsSocial
[numeric]
Mean (sd) : 26.6 (26.4)
min < med < max:
0 < 20 < 100
IQR (CV) : 41.8 (1)
65 distinct values 0
(0.0%)
D_politicsFiscal
[numeric]
Mean (sd) : 33.7 (28.4)
min < med < max:
0 < 30 < 100
IQR (CV) : 43.8 (0.8)
76 distinct values 0
(0.0%)
Prolific.Name
[factor]
1. MAG_S2_PROLIFIC_BLUECOLLA
2. MAG_S2_PROLIFIC_DATACOLLA
3. MAG_S2_PROLIFIC_GENERAL_B
4. MAG_S2_PROLIFIC_GENERAL_B
5. MAG_S2_PROLIFIC_GENERAL_B
6. MAG_S2_PROLIFIC_GENERAL_B
7. MAG_S2_PROLIFIC_GENERAL_B
8. MAG_S2_PROLIFIC_GENERAL_B
9. MAG_S2_PROLIFIC_GENERAL_B
10. MAG_S2_PROLIFIC_GENERAL_B
[ 4 others ]
0 ( 0.0%)
0 ( 0.0%)
40 (12.6%)
20 ( 6.3%)
20 ( 6.3%)
18 ( 5.7%)
22 ( 6.9%)
21 ( 6.6%)
19 ( 6.0%)
19 ( 6.0%)
139 (43.7%)
0
(0.0%)
Qualtrics.Survey
[factor]
1. MAG_S2_PROLIFIC_GENERAL_1
2. MAG_S2_PROLIFIC_GENERAL_2
3. MAG_S2_PROLIFIC_GENERAL_3
4. MAG_S2_PROLIFIC_GENERAL_4
5. MAG_S2_PROLIFIC_GENERAL_5
6. MAG_S2_PROLIFIC_GENERAL_6
7. MAG_S2_PROLIFIC-BLUECOLLA
8. MAG_S2_PROLIFIC-DATACOLLA
9. MAG_S2_TUMBLR_FREE
10. MAG_S2_TUMBLR_PAID
40 (12.6%)
40 (12.6%)
40 (12.6%)
40 (12.6%)
40 (12.6%)
40 (12.6%)
0 ( 0.0%)
0 ( 0.0%)
14 ( 4.4%)
64 (20.1%)
0
(0.0%)
Qualtrics.URL
[factor]
1. https://mit.co1.qualtrics\ 2. https://mit.co1.qualtrics\ 3. https://mit.co1.qualtrics\ 4. https://mit.co1.qualtrics\ 5. https://mit.co1.qualtrics\ 6. https://mit.co1.qualtrics\ 7. https://mit.co1.qualtrics\ 8. https://mit.co1.qualtrics\ 9. https://mit.co1.qualtrics\ 10. https://mit.co1.qualtrics 40 (12.6%)
64 (20.1%)
40 (12.6%)
14 ( 4.4%)
40 (12.6%)
40 (12.6%)
0 ( 0.0%)
0 ( 0.0%)
40 (12.6%)
40 (12.6%)
0
(0.0%)
Description
[character]
1. TUMBLR-paid
2. block1-full
3. block3-fill
4. block4-20
5. block5-fill
6. block6-fill
7. block2-10
8. block2-fill
9. block4-fill
10. block5-20
[ 3 others ]
64 (20.1%)
40 (12.6%)
22 ( 6.9%)
21 ( 6.6%)
21 ( 6.6%)
21 ( 6.6%)
20 ( 6.3%)
20 ( 6.3%)
19 ( 6.0%)
19 ( 6.0%)
51 (16.0%)
0
(0.0%)
Sample
[factor]
1. blue-collar
2. data-collar
3. general-prolific
4. tumblr-free
5. tumblr-paid
0 ( 0.0%)
0 ( 0.0%)
240 (75.5%)
14 ( 4.4%)
64 (20.1%)
0
(0.0%)
Scope
[factor]
1. pilot
2. study2
0 ( 0.0%)
318 (100.0%)
0
(0.0%)
SCREEN_workMethod
[character]
1. btwn-50-75
2. btwn25-50
3. less-25
4. more-75
51 (16.0%)
18 ( 5.7%)
43 (13.5%)
206 (64.8%)
0
(0.0%)
SCREEN_workFunction
[character]
1. other
2. Operations
3. Education-Professional
4. IT
5. Research
6. Administration-PersonalAs
7. Healthcare-Professional
8. Design-Creative
9. Sales-Business-Developmen
10. Data-Analysis
[ 65 others ]
72 (22.6%)
24 ( 7.5%)
21 ( 6.6%)
18 ( 5.7%)
18 ( 5.7%)
15 ( 4.7%)
15 ( 4.7%)
12 ( 3.8%)
12 ( 3.8%)
10 ( 3.1%)
101 (31.8%)
0
(0.0%)
SCREEN_workFunction_TEXT
[character]
1. N/A
2. Student
3. Unemployed
4. n/a
5. unemployed
6. Logistics
7. Not applicable
8. Alternative Health
9. Business services
10. Business support
[ 45 others ]
7 ( 9.5%)
5 ( 6.8%)
4 ( 5.4%)
3 ( 4.1%)
3 ( 4.1%)
2 ( 2.7%)
2 ( 2.7%)
1 ( 1.4%)
1 ( 1.4%)
1 ( 1.4%)
45 (60.8%)
244
(76.7%)
SCREEN_socialMedia
[character]
1. Twitter,Reddit,LinkedIn
2. Facebook,Instagram,Reddit
3. Facebook,Twitter,Instagra
4. Facebook,Twitter,Youtube,
5. Facebook,Instagram,Reddit
6. Facebook,Twitter,Youtube,
7. Tumbler,LinkedIn
8. Facebook,Twitter,Reddit,L
9. Facebook,Youtube,Reddit,L
10. Facebook,Instagram
[ 183 others ]
8 ( 2.5%)
7 ( 2.2%)
7 ( 2.2%)
7 ( 2.2%)
6 ( 1.9%)
6 ( 1.9%)
6 ( 1.9%)
5 ( 1.6%)
5 ( 1.6%)
4 ( 1.3%)
257 (80.8%)
0
(0.0%)
SCREEN_socialMedia_TEXT
[character]
1. Discord
2. Bluesky
3. 4chan
4. alt tech, to avoid exactl
5. discord
6. Livejournal
7. mastedons,telegram
8. Monster, GlassDoor
9. nextdoor
10. Pinterest
[ 4 others ]
4 (22.2%)
2 (11.1%)
1 ( 5.6%)
1 ( 5.6%)
1 ( 5.6%)
1 ( 5.6%)
1 ( 5.6%)
1 ( 5.6%)
1 ( 5.6%)
1 ( 5.6%)
4 (22.2%)
300
(94.3%)

2.2 Plotting the dataframe

# print("Explore shapes of the prepared dataframes")
# 
# qacBase::df_plot(df_graphs)
# qacBase::df_plot(df_questions)
# qacBase::df_plot(df_sd_questions_long)
# qacBase::df_plot(df_sd_questions_wide)
# qacBase::barcharts(df_participants)

2.3 Demographics

2.3.1 Total Response Time

df <- df_data

## BOXPLOT — SURVEY RESPONSE TIME 
ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=duration.min, color=Assigned.Block))+
  geom_boxplot(position=position_dodge(0.9))+
  geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
  facet_grid(Distribution ~.) + 
  coord_flip() + 
  labs( y = "Survey Response Time (mins)", x="",
        title = "TOTAL Response Time by Sample",
        subtitle = "(distributions of response times are similar across samples, as expected)") +
  theme_minimal() + theme(legend.position = "none") 

## RIDGEPLOT — SURVEY RESPONSE TIME 
ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
  geom_density_ridges(scale=0.8) +
  # geom_boxplot()+
  stat_pointinterval()+
  theme_ridges() +
  scale_fill_discrete(direction=-1)+
  facet_grid(Distribution ~.) + 
  theme_minimal() + 
  theme(legend.position = "none") +
    labs( x = "Survey Response Time (mins)", y="",
        title = "TOTAL Response Time by Sample",
        subtitle = "(distributions of response times are similar across samples, as expected)") 
## Picking joint bandwidth of 7.37
## Picking joint bandwidth of 11

p.desc.duration <- psych::describe(df_data %>% filter(Distribution=="PROLIFIC") %>% pull(duration.min))
t.desc.duration <- psych::describe(df_data %>% filter(Distribution=="TUMBLR") %>% pull(duration.min))

PROLIFIC SAMPLE (n = 240 ) participant response times ranged from 13.97 to 216.18 minutes, with a mean response time of 42.49 minutes, SD = 21.15.

TUMBLR SAMPLE (n = 78 ) participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 51.93 minutes, SD = 35.47.

2.3.2 Age

# AGE by SAMPLE
ggplot(data = df_data, aes( x = Assigned.Block, fill = fct_rev(D_age) )) +
  geom_bar(position = "stack") +
  facet_grid(Distribution ~ .) + 
  labs( title = "AGE by Sample", subtitle = "Expect similiar across samples", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
  theme_minimal() +
  easy_add_legend_title("")

# OVERALL AGE
ggstatsplot::ggbarstats(df_data, x= D_age, y=Distribution,
                          results.subtitle = FALSE) +
    theme_minimal() +
    scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
    easy_add_legend_title("Age") + 
    labs (title = "Participant Age")

2.3.3 Gender

# GENDER by SAMPLE
ggplot(data = df_data, aes( x = Assigned.Block, fill = fct_rev(D_gender) )) +
  geom_bar(position = "stack") +
  facet_grid(Distribution ~ .) + 
  labs( title = "Gender by Sample", subtitle = "Expect similiar across samples", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
  theme_minimal() 

  # easy_add_legend_title("")


# OVERALL GENDER
ggstatsplot::ggbarstats(df_data, x= D_gender, y=Distribution, 
                        results.subtitle = FALSE) +
    theme_minimal() +
    scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
    easy_add_legend_title("Gender") + 
    labs (title = "Participant Gender")

# COLLAPSED GENDER
ggstatsplot::ggbarstats(df_data, x= D_gender_collapsed, y=Distribution,
                        results.subtitle = FALSE) +
    theme_minimal() +
    scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
    easy_add_legend_title("Gender") + 
    labs (title = "Participant Gender (Collapsed)")

#PROLIFIC
df.p <- df_data %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table() 
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)


#TUMBLR
df.t <- df_data %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table() 
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)


title = "Participant Gender — Self Describe"
cols = c("Text","Count")
cont <- table(df_data$D_gender_4_TEXT)
cont %>% addmargins() %>% kbl(caption = title, col.names = cols) %>%  kable_classic()
Participant Gender — Self Describe
Text Count
Agender 1
gender fluid 1
genderqueer 2
genderqueer (dislike term non-binary) 1
genderqueer trans man 1
Genderqueer wlw 1
genderqueer woman 1
My sex is female, but I don’t think of myself as any gender. I’m in a female body, but my being isn’t tied to this cultural construct. 1
queer 1
She/They 1
Trans male 1
Transgender male 1
transmasc 1
Sum 14

240 individuals from Prolific participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).

Note that a higher proportion of participants recruited from Tumblr represent identities other than cis-gender Female and cis-gender Male. 78 individuals from Tumblr participated in Study 2, ( 36% Female, 5% Male, 40% Non-binary, 19% Other).

2.3.4 Education

df <- df_data

# EDUCATION by SAMPLE
ggplot(data = df, aes( x = Assigned.Block, fill = D_education )) +
  geom_bar(position = "stack") +
  facet_grid(Distribution ~ .)+
  scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
  labs( title = "EDUCATION by Sample",
        subtitle = "Expect similiar across samples") +
  theme_minimal() +
  easy_add_legend_title("Education")

# INCOME BY EDUCATION
ggstatsplot::ggbarstats(df_data, x= D_education, y=D_income) +
    theme_minimal() +
    scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) +
    labs (title = "INCOME by EDUCATION")

2.3.5 Political Values

df <- df_graphs

# FISCAL POLITICAL SAMPLE

leftside <- rep("left-leaning", length(ref_blocks))
rightside <- rep("right-leaning", length(ref_blocks))
g <- ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=D_politicsFiscal, color=Assigned.Block)) +
  geom_boxplot(position=position_dodge(0.9), width = 0.5)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
  facet_grid(Distribution ~ .)+
  labs( title = "FISCAL VALUES by Sample",
        subtitle = "(expect similar values across samples)",
         y = "Fiscal Politics", x = "") +
  # easy_add_legend_title("Sample") +
  theme_minimal() +
  coord_flip()
g + guides(
  y = guide_axis_manual(
  breaks = ref_blocks,
  labels = leftside
  ),
  y.sec = guide_axis_manual(
  breaks = ref_blocks,
  labels = rightside
))

# SOCIAL POLITICAL SAMPLE

leftside <- rep("left-leaning", length(ref_blocks))
rightside <- rep("right-leaning", length(ref_blocks))
g <- ggplot(df_data, aes(x=fct_rev(Assigned.Block), y=D_politicsSocial, color=Assigned.Block)) +
  geom_boxplot(position=position_dodge(0.9), width = 0.5)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.3) +
  facet_grid(Distribution ~ .)+
  labs( title = "SOCIAL VALUES by Sample",
        subtitle = "(expect similar values across samples)",
         y = "Social Politics", x = "") +
  # easy_add_legend_title("Sample") +
  theme_minimal() +
  coord_flip()
g + guides(
  y = guide_axis_manual(
  breaks = ref_blocks,
  labels = leftside
  ),
  y.sec = guide_axis_manual(
  breaks = ref_blocks,
  labels = rightside
))

r_politics <- nrow(df_data %>% filter(D_politicsSocial >= 50)) / nrow(df_data)
l_politics <- nrow(df_data %>% filter(D_politicsSocial < 50)) / nrow(df_data)
r_fiscal <- nrow(df_data %>% filter(D_politicsFiscal >= 50)) / nrow(df_data)
l_fiscal <- nrow(df_data %>% filter(D_politicsFiscal < 50)) / nrow(df_data)
 
rm(g)



#PROLIFIC
df.p <- df_data %>% filter(Distribution == "PROLIFIC")
desc.fiscal.p <- psych::describe(df.p$D_politicsFiscal)
desc.social.p <- psych::describe(df.p$D_politicsSocial)


#TUMBLR
df.t <- df_data %>% filter(Distribution == "TUMBLR")
desc.fiscal.t <- psych::describe(df.t$D_politicsFiscal)
desc.social.t <- psych::describe(df.t$D_politicsSocial)

For the 240 participants recruited from Prolific, a spectrum of Social Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 100, with a mean value of 32.27, SD = 27.49. A spectrum of Fiscal Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 100, with a mean value of 39.4, SD = 29.06.

For the 78 participants recruited from Tumblr, a spectrum of Social Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 50, with a mean value of 9.09, SD = 11.06. A spectrum of Fiscal Political values [ranging from 0 (left-leaning) to 100 (right-leaning)] ranged from 0 to 91, with a mean value of 16.08, SD = 16.97.

Overall, 77.99% of respondents identify with left-leaning social values (vs) 22.01% identifying as right-leaning; while 66.35% of respondents reported left-leaning fiscal values (vs) 33.65% identify as right-leaning.

2.3.6 Political Values — Relationship

df <- df_graphs %>% select(PID, Assigned.Block, Distribution, D_politicsSocial, D_politicsFiscal, D_politicalParty) %>% 
  mutate(
    d_social = D_politicsSocial,
    d_fiscal = D_politicsFiscal
  )

ggplot(df, aes(x = d_social, y = d_fiscal, color = D_politicalParty)) + 
  geom_point() + 
  geom_hline(yintercept = 50) +
  geom_vline(xintercept = 50) + 
  facet_grid(Distribution~D_politicalParty)+
  labs(
    title = "Social and Fiscal Political Values by Political Party Affiliation",
    x = "Social Values", y = "Fiscal Values"
  )+
  theme_minimal() + 
  easy_remove_legend() + 
  easy_remove_axes()

2.3.7 Politcal Affiliation

# POLITICAL PARTY
ggplot(data = df_data, aes( fill = D_politicalParty, x = Assigned.Block )) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .) + 
  scale_fill_viridis(discrete=TRUE, option="viridis", direction=-1) + 
  labs( title = "POLITICAL PARTY by Sample",
        subtitle = "Expect similiar across samples", x = "") +
  theme_minimal() +
  easy_add_legend_title("")

2.3.8 Platform Choice

#PLATFORM CHOICE
ggplot( df_data, aes( x = Assigned.Block, fill = PLATFORM)) +
  geom_bar(position = "stack") +
  facet_grid(Distribution ~ .) + 
  labs( title = "PLATFORM CHOICE by Sample",
        subtitle = "Expect similiar across samples", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") + 
  easy_add_legend_title("") +
  theme_minimal()

2.3.9 Confidence

How confident are participants in their attributions? We want to explore this from several perspectives, including: participant-level differences, question-level differences, and stimulus-level differences

2.3.9.1 Distribution of Confidence

## QUESTION LEVEL CONFIDENCE 
df <- df_questions %>%
  filter(QUESTION %in% ref_conf_questions) %>% 
  mutate(value= as.numeric(value))


(g <- ggplot(df, aes(x = value, fill = QUESTION)) + 
  geom_histogram() + 
  facet_wrap(. ~ QUESTION) +  #, scales = "free", space = "free", drop = TRUE
  theme_minimal() + 
  labs(title = "Distrubtion of Confidence by Question") + 
  easy_remove_legend())

ggsave(plot = g, path="figs/histograms", filename =paste0("confidence_","histograms.png"), units = c("in"))
## QUESTION LEVEL CONFIDENCE 
df <- df_questions %>%
  filter(QUESTION %in% ref_sd_questions) %>% 
  mutate(value= as.numeric(value))


(g <- ggplot(df, aes(x = value, fill = QUESTION)) + 
  geom_histogram() + 
  facet_grid( STIMULUS~ QUESTION) + 
  theme_minimal() + 
  labs(title = "Distrubtion of SEMANTIC DIFFERENTIAL SCALES by Question") + 
  easy_remove_legend())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(plot = g, path="figs/histograms", filename =paste0("sd_scales_by_question_stimulus","histograms.png"), units = c("in"), width = 20, height = 30)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## QUESTION LEVEL CONFIDENCE 
df <- df_questions %>%
  filter(QUESTION %in% ref_sd_questions) %>% 
  mutate(value= as.numeric(value))


(g <- ggplot(df, aes(x = value, fill = STIMULUS_CATEGORY)) + 
  geom_histogram() + 
  facet_grid( STIMULUS_CATEGORY~ QUESTION) + 
  theme_minimal() + 
  labs(title = "Distrubtion of SEMANTIC DIFFERENTIAL SCALES by Question") + 
  easy_remove_legend())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(plot = g, path="figs/histograms", filename =paste0("sd_scales_by_question_category","histograms.png"), units = c("in"), width = 20, height = 20)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## STIMULUS LEVEL CONFIDENCE 

df <- df_questions %>%
  filter(QUESTION %in% ref_conf_questions) %>% 
  mutate(value= as.numeric(value)) 

(g <- ggplot(df, aes(x = value, fill = fct_rev(STIMULUS_CATEGORY))) + 
  geom_histogram() + 
  facet_grid(rows = vars(BLOCK),
             cols = vars(fct_rev(STIMULUS_CATEGORY)), scales = "free_y", space = "free", drop = TRUE) +
  theme_minimal()+  
  labs(title = "Distrubtion of Confidence by Stimulus"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(plot = g, path="figs/histograms", filename =paste0("confidence_matrix_","histograms.png"), units = c("in"), width = 20, height = 20)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.3.9.2 Individual Participant Confidence

df <- df_questions %>%
  filter(QUESTION %in% c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")) %>% 
  mutate(value= as.numeric(value)) %>% 
  group_by(PID, QUESTION) %>% 
  summarise(
    mean_conf = mean(value),
    sd_conf = sd(value),
    var_conf = var(value)
  )


ggplot(df, aes( x = mean_conf)) +
  geom_histogram() +
  facet_wrap( ~ QUESTION) + 
  labs(
    title = "Mean Participant confidence by Question",
    subtitle = "(averaging across all 5 stimuli per participant)"
  ) + 
  theme_minimal()

ggplot(df, aes( x = sd_conf)) +
  geom_histogram() +
  facet_wrap( ~ QUESTION) + 
  labs(
    title = "Standard Deviation of Participant confidence by Question",
    subtitle = "(averaging across all 5 stimuli per participant)"
  ) +
  theme_minimal()  

## BY PARTICIPANT
df <- df_questions %>%
  filter(QUESTION %in% c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")) %>% 
  mutate(value= as.numeric(value)) %>% 
  group_by(PID) %>% 
  summarise(
    mean_conf = mean(value),
    sd_conf = sd(value),
    var_conf = var(value)
  )

ggplot(df, aes( x = mean_conf)) +
  geom_histogram(binwidth = 2) +
  
  labs(
    title = "Mean Participant Confidence ",
    subtitle = "(averaging across 4 confidence questions and all 5 stimuli per participant)"
  ) +
  theme_minimal()  

ggplot(df, aes( x = sd_conf)) +
  geom_histogram(binwidth = 2) +
  xlim(0,100)+
  labs(
    title = "Standard Deviation of Participant Confidence ",
    subtitle = "(averaging across 4 confidence questions and all 5 stimuli per participant)"
  ) +
  theme_minimal()  

2.3.9.3 Predicting Confidence

df <- df_graphs %>% left_join(df_confidence_wide, by="PID") %>% 
  select(-contains(".y")) %>% 
  rename_with(
    stringr::str_replace, 
      pattern = ".x", replacement = ""
  )


## maker confidence by age
(m <- ggplot(df, aes( color = CONF_mean, x = D_age, y = MAKER_CONF, text = PID)) + 
  geom_jitter(size = 0.5, width = .2, height = 0) + 
  scale_colour_paletteer_c("ggthemes::Red-Black Diverging", direction = -1)+
  labs (
    title = "Confidence in MAKER_ID by Participant Age",
    subtitle = "color by average participant confidence: red HIGH, black LOW",
    x = "participant age"
  ) + theme_minimal() )

# ggplotly(m)

## age maker confidence by age
(a <- ggplot(df, aes( color = CONF_mean, x = D_age, y = AGE_CONF, text = PID)) + 
  geom_jitter(size = 0.5, width = .2, height = 0) + 
  scale_colour_paletteer_c("ggthemes::Red-Black Diverging", direction = -1)+
  labs (
    title = "Confidence in MAKER_AGE by Participant Age",
    subtitle = "color by average participant confidence: red HIGH, black LOW",
    x = "participant age"
  ) + theme_minimal() )

# ggplotly(a)

## age maker confidence by age
(a <- ggplot(df, aes( color = CONF_mean, x = D_age, y = GENDER_CONF, text = PID)) + 
  geom_jitter(size = 0.5, width = .2, height = 0) + 
  scale_colour_paletteer_c("ggthemes::Red-Black Diverging", direction = -1)+
  labs (
    title = "Confidence in MAKER_GENDER by Participant Age",
    subtitle = "color by average participant confidence: red HIGH, black LOW",
    x = "participant age"
  ) + theme_minimal() )

# ggplotly(a)

3 ATTRIBUTIONS

3.1 MAKER ID

3.1.1 Maker ID by Stimulus

df <- df_graphs 

#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) +
  geom_bar(position = "fill") +
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  labs( title = "MAKER ID by Stimulus (grouped by CATEGORY) ",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()

#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) +
  geom_bar(position = "fill") +
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER ID by Stimulus (grouped by CATEGORY) ",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()

3.1.2 Maker ID by Category

df <- df_graphs 

#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_ID)) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .) + 
  labs( title = "MAKER ID by Stimulus Category",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()

#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_ID)) +
  geom_bar(position = "fill") +
  labs( title = "MAKER ID by Stimulus Category",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()

3.1.3 Maker ID & Confidence by Stimulus

#FILTER DATAFRAME
df <- df_graphs 

######### MAKER ID AND CONFIDENCE ##############
# MAKER_ID by Sample and BEHAVIOR 
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) + 
  # geom_bar(width = 0.5, position = position_dodge(0.5)) +
  geom_bar(position = "fill", width = 0.8) + 
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  theme_minimal() 

# MAKER_CONFIDENCE by IDENTIFICATION  
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = MAKER_CONF, color = MAKER_ID)) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.5)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.4) +
  scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "") +
  labs (x = "") + 
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'MAKER ID & CONFIDENCE by Stimulus',
  subtitle = '',
  caption = ''))

###################################################
rm(a,b,p)

3.1.4 Maker ID & Confidence

#FILTER DATAFRAME
df <- df_graphs 

######### MAKER ID AND CONFIDENCE ##############
# MAKER_ID by Sample and BEHAVIOR 
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = MAKER_ID)) + 
  geom_bar(position = "fill") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  theme_minimal() 

# MAKER_CONFIDENCE by IDENTIFICATION  
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = MAKER_CONF, color = MAKER_ID)) + 
  geom_boxplot(position=position_dodge(0.9))+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_viridis(discrete=TRUE, option="viridis") +
  # labs (title = "MAKER-ID-CONFIDENCE") +
  labs (x = "STIMULUS CATEGORY") + 
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'MAKER ID & CONFIDENCE',
  subtitle = '',
  caption = ''))

rm(a,b,p)

###################################################

3.2 MAKER AGE

3.2.1 Maker Age by Stimulus

df <- df_graphs 

#MAKER AGE
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_AGE)) +
  geom_bar(position = "fill") +
  #SCALES & SPACE FREE SUPPRESSES EMPTY ROWS
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  labs( title = "MAKER AGE by Stimulus (grouped by CATEGORY) ",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  coord_flip()+
  # easy_add_legend_title("") +
  theme_minimal()

3.2.2 Maker Age by Category

df <- df_graphs 

#MAKER AGE
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_AGE)) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .) + 
  labs( title = "MAKER AGE by Stimulus Category",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid(rows= vars(D_age)) +
  coord_flip()+
  theme_minimal()

3.2.3 Maker Age & Confidence by Stimulus

#FILTER DATAFRAME
df <- df_graphs 

######### MAKER ID AND CONFIDENCE ##############
# MAKER_ID by Sample and BEHAVIOR 
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = MAKER_AGE)) + 
  geom_bar(position = "fill", width = 0.8) +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  theme_minimal() 

# MAKER_CONFIDENCE by IDENTIFICATION  
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = MAKER_CONF, color = MAKER_AGE)) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.5)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "") +
  labs (x = "STIMULUS CATEGORY") + 
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'MAKER AGE & CONFIDENCE by Category',
  subtitle = '',
  caption = ''))

###################################################

rm(a,b,p)

3.2.4 Maker Age & Confidence by Category

#FILTER DATAFRAME
df <- df_graphs 

######### MAKER ID AND CONFIDENCE ##############
# MAKER_AGE by Sample and BEHAVIOR 
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = MAKER_AGE)) + 
  geom_bar(position = "fill") + #dodge
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  theme_minimal() 

# MAKER_CONFIDENCE by IDENTIFICATION  
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = AGE_CONF, color = MAKER_AGE)) + 
  geom_boxplot(position=position_dodge(0.9))+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (x = "STIMULUS CATEGORY") + 
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'MAKER AGE & CONFIDENCE by Category',
  subtitle = '',
  caption = ''))

###################################################

rm(a,b,p)

3.3 MAKER GENDER

3.3.1 Maker Gender by Stimulus

df <- df_graphs 

#MAKER GENDER
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_GENDER)) +
  geom_bar(position = "fill") +
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  labs( title = "MAKER GENDER by Stimulus (grouped by CATEGORY) ",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  coord_flip()+
  # easy_add_legend_title("") +
  theme_minimal()

### Maker Gender by Category

df <- df_graphs 

#MAKER GENDER
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = MAKER_GENDER)) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .)+
  labs( title = "MAKER GENDER by Stimulus Category",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid(rows= vars(D_age)) +
  coord_flip()+
  theme_minimal()

3.3.2 Maker Gender & Confidence by Stimulus

#FILTER DATAFRAME
df <- df_graphs 

######### MAKER GENDER AND CONFIDENCE ##############
# MAKER_GENDER by STIMULUS
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = MAKER_GENDER)) + 
  geom_bar(position = "fill", width = 0.8) +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  theme_minimal() 

# GENDER_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = GENDER_CONF, color = MAKER_GENDER)) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.5)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (x = "STIMULUS") + 
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'MAKER GENDER & CONFIDENCE by Stimulus',
  subtitle = '',
  caption = ''))

###################################################

rm(a,b,p)

3.3.3 Maker Gender & Confidence by Category

#FILTER DATAFRAME
df <- df_graphs 

######### MAKER GENDER AND CONFIDENCE ##############
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = MAKER_GENDER)) + 
  geom_bar(position = "fill") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  theme_minimal() 

# GENDER_CONFIDENCE by GENDER  
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = GENDER_CONF, color = MAKER_GENDER)) + 
  geom_boxplot(position=position_dodge(0.9))+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (x = "STIMULUS CATEGORY") + 
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'MAKER GENDER & CONFIDENCE by Category',
  subtitle = '',
  caption = ''))

###################################################

rm(a,b,p)

3.4 TOOL CHOICE

3.4.1 Tool ID by Stimulus

df <- df_tools 

# TOOL CHOICE BY STIMULUS
ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = fct_rev(STIMULUS) )) +
  geom_bar(position = "fill") +
  coord_flip() + 
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  scale_y_continuous(labels = scales::percent) + 

  labs( title = "TOOL ID by Stimulus (grouped by Category)",
        subtitle = "", x = "") +
  easy_add_legend_title("TOOL_ID") +
  theme_minimal() 

3.4.2 Tool ID by Category

df <- df_tools 

# TOOL CHOICE BY STIMULUS
ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = STIMULUS_CATEGORY )) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .) + 
  coord_flip() + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  scale_y_continuous(labels = scales::percent) + 
  labs( title = "TOOL ID by Category",
        subtitle = "", x = "") +
  easy_add_legend_title("TOOL_ID") +
  theme_minimal() 

3.4.3 Tool ID & Confidence by Stimulus

#FILTER DATAFRAME
df <- df_tools 

######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = fct_rev(STIMULUS), fill = fct_rev(TOOL_ID))) + 
  geom_bar(position = "fill", width = 0.8) +
  scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") +
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

# TOOL_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = fct_rev(STIMULUS), y = TOOL_CONF, color = fct_rev(TOOL_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_paletteer_d("awtools::a_palette", direction = 1) +
  labs (x = "STIMULUS") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'TOOL ID & CONFIDENCE by Stimulus',
  subtitle = '',
  caption = ''))

###################################################

rm(a,b,p)

3.4.4 Tool ID & Confidence by Category

#FILTER DATAFRAME
df <- df_tools 

######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = fct_rev(STIMULUS_CATEGORY), fill = fct_rev(TOOL_ID))) + 
  geom_bar(position = "fill") +
  scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

# TOOL CONF 
b <- ggplot(df, aes( x = fct_rev(STIMULUS_CATEGORY), y = TOOL_CONF, color = fct_rev(TOOL_ID))) + 
  geom_boxplot(position=position_dodge(0.9))+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_paletteer_d("awtools::a_palette", direction = 1) +
  labs (x = "STIMULUS CATEGORY") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

(p <- (a / b )  + plot_annotation(
  title = 'TOOL ID & CONFIDENCE by Category',
  subtitle = '',
  caption = ''))

###################################################

rm(a,b,p)

3.5 ENCOUNTER CHOICE

3.5.1 Encounter Choice by Stimulus

df <- df_graphs 

#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = ENCOUNTER)) +
  geom_bar(position = "fill") +
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  labs( title = "MAKER ENCOUNTER by Stimulus (grouped by CATEGORY) ",
        subtitle = "", x = "") +
  scale_fill_brewer(palette = "Dark2") + 
  # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  coord_flip()+
  # easy_add_legend_title("") +
  theme_minimal()

3.5.2 Encounter Choice by Category

df <- df_graphs 

#MAKER IDENTIFICATION
ggplot( df, aes( x = STIMULUS_CATEGORY, fill = ENCOUNTER)) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .) + 
  labs( title = "ENCOUNTER by Stimulus Category (grouped by CATEGORY)",
        subtitle = "", x = "") +
  scale_fill_brewer(palette = "Dark2") + 
  coord_flip()+
  theme_minimal()

3.6 ENGAGEMENT CHOICE

3.6.1 Actions Choice by Stimulus

df <- df_actions 

# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
  geom_bar(position = "fill") +
  coord_flip() + 
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  scale_y_continuous(labels = scales::percent) + 

  labs( title = "Chart Action by Stimulus (grouped by Category)",
        subtitle = "", x = "") +
  easy_add_legend_title("ACTION") +
  theme_minimal() 

# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
  geom_bar(position = "fill") +
  coord_flip() + 
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  scale_y_continuous(labels = scales::percent) + 

  labs( title = "Chart Action by Stimulus (grouped by Category)",
        subtitle = "", x = "") +
  easy_add_legend_title("ACTION") +
  theme_minimal() 

3.6.2 Actions Choice by Category

df <- df_actions 

# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = STIMULUS_CATEGORY )) +
  geom_bar(position = "fill") +
  facet_grid(Distribution ~ .) + 
  coord_flip() + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  scale_y_continuous(labels = scales::percent) + 
  labs( title = "CHART ACTION by Category",
        subtitle = "", x = "") +
  easy_add_legend_title("ACTION") +
  theme_minimal() 

3.7 CHART BEAUTY

3.7.1 Chart Beauty by Stimulus and Category

df <- df_graphs

qacBase::qstats(df, CHART_BEAUTY, STIMULUS) %>% arrange(mean)
##    STIMULUS   n  mean    sd
## 1      B5-1  53 27.23 22.54
## 2      B4-2  54 28.24 26.07
## 3      B3-1  52 28.56 22.96
## 4      B3-3  52 31.54 27.19
## 5      B2-2  52 31.73 26.11
## 6      B1-3  55 38.13 32.33
## 7      B6-4  52 40.12 29.57
## 8      B1-4  55 45.18 28.97
## 9      B1-2  55 46.09 25.28
## 10     B6-2  52 46.12 27.68
## 11     B5-2  53 49.17 22.59
## 12     B0-0 318 49.67 26.95
## 13     B5-3  53 50.19 27.54
## 14     B4-1  54 53.09 27.92
## 15     B2-1  52 55.13 25.80
## 16     B1-1  55 55.75 29.37
## 17     B6-1  52 56.06 24.03
## 18     B3-2  52 57.25 25.32
## 19     B5-4  53 57.81 26.61
## 20     B2-4  52 61.62 26.60
## 21     B4-4  54 61.70 28.12
## 22     B6-3  52 62.19 27.48
## 23     B3-4  52 63.60 27.31
## 24     B4-3  54 66.56 20.81
## 25     B2-3  52 74.23 24.85
qacBase::qstats(df, CHART_BEAUTY, STIMULUS_CATEGORY) %>% arrange(mean)
##   STIMULUS_CATEGORY   n  mean    sd
## 1                 B 318 43.05 27.26
## 2                 A 318 46.05 28.45
## 3                 F 318 49.67 26.95
## 4                 C 318 53.73 30.80
## 5                 D 318 54.96 29.11
# 
# 
# ggplot(df, aes(x = CHART_BEAUTY)) + 
#   geom_histogram(bins = 20) +
#   facet_grid(BLOCK ~ STIMULUS_CATEGORY, scales = "free", space = "free", drop=TRUE) + 
#   theme_minimal()
# 
# 
# ggplot(df, aes(x = CHART_BEAUTY)) + 
#   geom_histogram(bins = 20) +
#   facet_grid( ~ fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free", drop=TRUE) + 
#   theme_minimal()
# 



##ggstatsplot BY CATEGORY
grouped_gghistostats(
  data = df_graphs %>% filter(STIMULUS != "B0-0"),
  x = CHART_BEAUTY, ## same outcome variable
  grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
  type = "robust", ## robust test: one-sample percentile bootstrap
  test.value = 50, ## test value against which sample mean is to be compared
  centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
  # ggtheme = ggthemes::theme_stata(), ## changing default theme
  ## turn off ggstatsplot theme layer
  ## arguments relevant for combine_plots
  annotation.args = list(
    title = "DISTRIBUTION of Chart Beauty by Cateogry",
    caption = ""
  ),
  plotgrid.args = list(nrow = 6)
)

#B0-0 
gghistostats(
  data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
  x = CHART_BEAUTY, ## numeric variable
  xlab = "CHART BEAUTY", ## x-axis label
  title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
  test.value = 50, ## test value
  caption = ""
)  

3.7.2 Chart Trust by Stimulus and Category

df <- df_graphs

qacBase::qstats(df, CHART_TRUST, STIMULUS) %>% arrange(mean)
##    STIMULUS   n  mean    sd
## 1      B1-3  55 30.98 27.33
## 2      B3-3  52 39.92 27.16
## 3      B1-4  55 45.45 23.37
## 4      B2-4  52 47.94 26.74
## 5      B5-3  53 49.15 24.83
## 6      B4-2  54 50.50 21.10
## 7      B0-0 318 50.79 20.05
## 8      B6-4  52 52.00 22.13
## 9      B5-4  53 53.68 21.04
## 10     B6-3  52 54.37 21.55
## 11     B2-2  52 54.52 22.15
## 12     B4-4  54 55.61 24.54
## 13     B6-1  52 56.52 19.00
## 14     B1-2  55 56.98 22.19
## 15     B6-2  52 57.50 21.89
## 16     B1-1  55 58.04 22.54
## 17     B3-2  52 58.52 18.56
## 18     B3-4  52 59.00 20.68
## 19     B5-1  53 61.77 23.68
## 20     B5-2  53 62.36 20.57
## 21     B2-1  52 62.44 20.45
## 22     B3-1  52 63.31 16.86
## 23     B2-3  52 66.00 22.10
## 24     B4-1  54 66.24 22.82
## 25     B4-3  54 72.48 19.94
qacBase::qstats(df, CHART_TRUST, STIMULUS_CATEGORY) %>% arrange(mean)
##   STIMULUS_CATEGORY   n  mean    sd
## 1                 F 318 50.79 20.05
## 2                 C 318 52.07 27.81
## 3                 D 318 52.24 23.45
## 4                 B 318 56.71 21.27
## 5                 A 318 61.39 21.15
##ggstatsplot BY CATEGORY
grouped_gghistostats(
  data = df_graphs %>% filter(STIMULUS != "B0-0"),
  x = CHART_TRUST, ## same outcome variable
  grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
  type = "robust", ## robust test: one-sample percentile bootstrap
  test.value = 50, ## test value against which sample mean is to be compared
  centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
  # ggtheme = ggthemes::theme_stata(), ## changing default theme
  ## turn off ggstatsplot theme layer
  ## arguments relevant for combine_plots
  annotation.args = list(
    title = "DISTRIBUTION of Chart TRUST by Cateogry",
    caption = ""
  ),
  plotgrid.args = list(nrow = 6)
)

#B0-0 
gghistostats(
  data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
  x = CHART_TRUST, ## numeric variable
  xlab = "CHART TRUST", ## x-axis label
  title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
  test.value = 50, ## test value
  caption = ""
)  

4 FISH

4.0.1 bivariate combinations

if(graph_render == TRUE){





### EXPLORE ALL QUESTIONS AS EXPLAINED BY DEMOGRAPHICS
df <- df_graphs 
predictors = df %>% select(Distribution, STIMULUS_CATEGORY,
                D_gender_collapsed, D_age, D_education, D_income,
                D_politicalParty, D_politicsFiscal, D_politicsSocial) %>% colnames()

questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)

i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0(q, " as explained by demographic variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/bivariate/bivariate_demographics", filename =paste0(q,"_demo_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g)
#####################################################END




### EXPLORE ALL QUESTIONS AS EXPLAINED BY SD VARS
df <- df_graphs 
predictors = c(ref_sd_questions)
questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)
i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0(q, " as explained by SD variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/bivariate/bivariate_SD", filename =paste0(q,"_sd_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g )
#####################################################END



### EXPLORE ALL QUESTIONS AS EXPLAINED BY CONF VARS
df <- df_graphs 
predictors = c(ref_conf_questions)
questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)
i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0(q, " as explained by CONF variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/bivariate/bivariate_CONF", filename =paste0(q,"_conf_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g)
#####################################################END


### EXPLORE ALL QUESTIONS AS EXPLAINED BY CAT VARS
df <- df_graphs 
predictors = c(ref_cat_questions)
questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)
i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0(q, " as explained by CATEGORICAL variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/bivariate/bivariate_CAT", filename =paste0(q,"_cat_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g)
#####################################################END


}

4.0.2 DEMO-GENDER > MAKER-GENDER?

df <- df_graphs %>% select(PID, D_gender, MAKER_GENDER, GENDER_CONF, STIMULUS, STIMULUS_CATEGORY, BLOCK)


## ALL STIMULI
ggplot(df, aes(x = MAKER_GENDER, fill= D_gender)) + 
  geom_bar(position = "stack")  + 
  scale_fill_viridis(discrete=TRUE, option="viridis") + 
  theme_minimal() + 
  labs(
    title = "MAKER GENDER by participant gender",
    subtitle = "(across all stimuli)",
    caption = "ARF: Attribution of maker gender does not *appear* to differ by participant \n gender identity; Each personal gender has attributions to each maker gender"
  )

## CATEGORY
ggplot(df, aes(x = MAKER_GENDER, fill= D_gender)) + 
  geom_bar(position = "stack")  + 
  facet_grid(.~fct_rev(STIMULUS_CATEGORY))+
  scale_fill_viridis(discrete=TRUE, option="viridis") + 
  theme_minimal() + 
  labs(
    title = "MAKER GENDER by participant gender",
    subtitle = "(by stimulus category)",
    caption = "ARF: Note that across categories most attributions are male, 
    EXCEPT F (millenial pink plants) which is largely female
    HOWEVER relative proportion by participant gender looks consistent" 
  )

## STIMULUS
ggplot(df, aes(x = MAKER_GENDER, fill= D_gender)) + 
  geom_bar(position = "stack")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  scale_fill_viridis(discrete=TRUE, option="viridis")+
  theme_minimal() + 
  labs(
    title = "MAKER GENDER by participant gender",
    subtitle = "(by stimulus)",
    caption = "ARF: Again, relative proportions look consistent by participant gender"
  )

## B00
df <- df %>% filter(STIMULUS=="B0-0")
ggplot(df, aes(x = MAKER_GENDER, fill= D_gender)) + 
  geom_bar(position = "stack")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  scale_fill_viridis(discrete=TRUE, option="viridis")+
  theme_minimal() + 
  labs(
    title = "MAKER GENDER by participant gender",
   subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = "ARF: Most attributions are FEMALE but relative proportion of 
    participant gender appears comparable; difficult to detect due to low number
    of non-binary, prefer not to say, prefer to self-describe"
  )

### DEMO-GENDER_Q_all_stimuli

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){

####################CREATE SD PLOTS FOR EACH QUESTION#####################

#set questions to be graphed 
questions <- ref_sd_questions  #created in wrangling block
box_s_question <- list()
rain_s_question <- list()
i = 0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  df <- df_sd_questions_wide %>% filter(QUESTION == q)

  #subset data cols 
  c <- df %>% select( all_of(ref_stimuli))
  # plot_sd = function (data, column, type, facet, facet_by,  boxplot) {

  plots <- as.list(lapply(colnames(c), plot_sd, data = df, type = "Q", facet=TRUE, facet_by= "D_gender_collapsed", boxplot=TRUE)) 
  
  #aggregate stimulus plots into block for question
  x <- plots[[1]] / plots[[2]]  / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
        plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] /  plots[[12]] / plots[[13]] / plots[[14]] / 
        plots[[15]] /plots[[16]] /plots[[17]] /plots[[18]] /  plots[[19]] / plots[[20]] / plots[[21]] /
        plots[[22]] /plots[[23]] /plots[[24]] /plots[[25]] + 
      plot_annotation(
        title = title,
       subtitle =""
      )
  box_s_question[[i]] <- x 
  ggsave(plot = x, path="figs/GENDER_by_q_for_all_stimuli", filename =paste0(q,"_box.png"), units = c("in"), width = 10, height = 26)

  
  
  
  ############# RAINCLOUD
  # setup dataframe 
  title <- paste(q)
  df <- df_sd_questions_long %>% filter(QUESTION ==q)
    #select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>%  mutate(value = get(s))
  left <- ref_labels[q,]$left
  right <- ref_labels[q,]$right

  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(STIMULUS) , x = value, fill = fct_rev(STIMULUS))) +
      stat_slab(normalize="groups", scale = 0.7) +
      stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
    facet_grid(~D_gender_collapsed)+
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) +
  labs (title = title) +
  theme_minimal() 
   
  
  rain_s_question[[i]] <- x
  ggsave(plot = x, path="figs/GENDER_by_q_for_all_stimuli", filename =paste0(q,"_rain.png"), units = c("in"), width = 10, height = 14  )
  

  
}

#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_s_question$MAKER_DESIGN
#ALSO plots_s_question$MAKER_DESIGN[[1]]
names(box_s_question) <- questions
names(rain_s_question) <- questions
rm(x, i, plots, left, right)

#############################################################################

}

4.0.3 DEMO-GENDER-MILLENIAL-PLANTS-SDS

#################### ALL QUESTIONS AT B00 #####################
# ONE PLOT FOR EACH STIMULUS WITH ALL QUESTIONS

#set stimuli to be graphed 
# stimuli <- ref_stimuli #created in wrangling block
stimuli <- c("B0-0")

box_stimuli <- list()
rain_stimuli <- list()
i = 0
for (s in stimuli){
  i = i+1
  
  # setup dataframe 
  title <- df_stimuli %>% filter(ID == s) %>% select(NAME) 
  title <- paste(s,"|",title)
  df <- df_graphs %>% filter(STIMULUS== s)

  #subset data cols 
  cols <- df %>% select( all_of(ref_sd_questions))
  plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", facet = TRUE, facet_by="D_gender_collapsed", boxplot=TRUE))

  #aggregate q plots into one for stimulus 
  x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
   plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] + 
   plot_annotation(
     title = title,
     subtitle =""
   )
  
  box_stimuli[[i]] <- x
  ggsave(plot = x, path="figs/_millenial-pink-plants", filename =paste0(s,"_box.png"), units = c("in"), width = 10, height = 14  )
  
  
  #### RAINCLOUD PLOT
  # setup dataframe 
  title <- df_stimuli %>% filter(ID == s) %>% select(NAME) 
  title <- paste(s,"|",title)
  df <- df_sd_questions_wide %>% select(1:6, D_gender_collapsed, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>%  mutate(value = get(s))


  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value, fill = fct_rev(QUESTION))) +
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.7) +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  facet_grid(~ D_gender_collapsed)+
  guides(
    # y = guide_axis_manual(labels = paste(levels(df$QUESTION),ref_labels$left)),
    y = guide_axis_manual(labels = rev(ref_labels$left)),
    y.sec = guide_axis_manual(labels = rev(ref_labels$right))
  ) + 
  labs (title = title) +
  theme_minimal() 
   
  
  rain_stimuli[[i]] <- x
  ggsave(plot = x, path="figs/_millenial-pink-plants", filename =paste0(s,"_rain.png"), units = c("in"), width = 10, height = 14  )
  
}

#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_stimulus$`B1-1`
#ALSO plots_stimulus$`B1-1`[[1]]
names(box_stimuli) <- stimuli
names(rain_stimuli) <- stimuli
rm(x, i, plots)

#############################################################################

4.0.4 DEMO-GENDER-MILLENIAL-PLANTS-CATS

#filter dataframe
df <- df_graphs %>% select(PID, D_gender_collapsed, MAKER_GENDER, GENDER_CONF, MAKER_AGE, AGE_CONF, MAKER_ID, MAKER_CONF, STIMULUS, STIMULUS_CATEGORY, BLOCK) %>% filter(STIMULUS=="B0-0")


##ID
p <- ggplot(df, aes(x = MAKER_ID, fill= D_gender_collapsed)) + 
  geom_bar(position = "fill")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  theme_minimal() + 
  labs(
    title = "MAKER ID by participant gender (proportion)",
    subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = ""
  )

s <- ggplot(df, aes(x = MAKER_ID, fill= D_gender_collapsed)) + 
  geom_bar(position = "stack")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  theme_minimal() + 
  labs(
    title = "MAKER ID by participant gender (true count)",
    subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = ""
  )

(p/s)

##AGE
p <- ggplot(df, aes(x = MAKER_AGE, fill= D_gender_collapsed)) + 
  geom_bar(position = "fill")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  theme_minimal() + 
  labs(
    title = "MAKER AGE by participant gender (proportion)",
    subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = ""
  )

s <- ggplot(df, aes(x = MAKER_AGE, fill= D_gender_collapsed)) + 
  geom_bar(position = "stack")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  theme_minimal() + 
  labs(
    title = "MAKER AGE by participant gender (true count)",
    subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = ""
  )

(p/s)

##GENDER
p <- ggplot(df, aes(x = MAKER_GENDER, fill= D_gender_collapsed)) + 
  geom_bar(position = "fill")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  theme_minimal() + 
  labs(
    title = "MAKER GENDER by participant gender (proportion)",
    subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = "ARF: Proportion of OTHER looks high!"
  )

s <- ggplot(df, aes(x = MAKER_GENDER, fill= D_gender_collapsed)) + 
  geom_bar(position = "stack")  + 
  facet_grid(BLOCK~fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free")+
  theme_minimal() + 
  labs(
    title = "MAKER GENDER by participant gender (true count)",
    subtitle = "B0-0 MILLENIAL PINK PLANTS",
    caption = "ARF: But count of other is very low :("
  )

(p/s)

##MAKER CONFIDENCE BY GENDER 
ggplot(df, aes(x=D_gender_collapsed, y = MAKER_CONF, color = D_gender_collapsed))+
  geom_jitter(width=0.2, height = 0) + 
  labs (title = "CONFIDENCE in MAKER_ID by Participant Gender",
        x = "participant gender")+
  theme_minimal() +
  easy_remove_legend()

##AGE CONFIDENCE BY GENDER 
ggplot(df, aes(x=D_gender_collapsed, y = AGE_CONF, color = D_gender_collapsed))+
  geom_jitter(width=0.2, height = 0) + 
  labs (title = "CONFIDENCE in MAKER_AGE by Participant Gender",
        x = "participant gender")+
  theme_minimal() +
  easy_remove_legend()

##GENDER CONFIDENCE BY GENDER 
ggplot(df, aes(x=D_gender_collapsed, y = GENDER_CONF, color = D_gender_collapsed))+
  geom_jitter(width=0.2, height = 0) + 
  labs (title = "CONFIDENCE in MAKER_GENDER by Participant Gender",
        x = "participant gender")+
  theme_minimal() +
  easy_remove_legend()

5 B1-3 OMINOUS AMERICAN FLAG

df <- df_graphs %>% filter(STIMULUS == "B1-3")

#MAKER IDENTIFICATION
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_ID)) +
  geom_bar(position = "fill") +
  labs( title = "MAKER ID [B1-C]) ",
        subtitle = "", x = "") +
  scale_fill_viridis(discrete=TRUE, option="viridis") +
  easy_add_legend_title("") +
  theme_minimal()

#MAKER AGE
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_AGE)) +
  geom_bar(position = "fill") +
  labs( title = "MAKER AGE [B1-C] ",
        subtitle = "", x = "") +
  scale_fill_paletteer_d("ggsci::light_uchicago", direction = -1)+
  theme_minimal()

#MAKER GENDER
ggplot( df, aes( x = fct_rev(STIMULUS), fill = MAKER_GENDER)) +
  geom_bar(position = "fill") +
  # facet_grid(D_gender_collapsed ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER GENDER [B1-C] ",
        subtitle = "", x = "") +
  scale_fill_paletteer_d("ggthemes::excel_Celestial", direction = 1)+
  theme_minimal()

df <- df_tools %>% filter (STIMULUS =="B1-3")

# TOOL CHOICE BY STIMULUS
ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = fct_rev(STIMULUS) )) +
  geom_bar(position = "fill") +
  # coord_flip() + 
  # facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  scale_y_continuous(labels = scales::percent) + 
  labs( title = "TOOL ID by Stimulus (grouped by Category)",
        subtitle = "", x = "") +
  easy_add_legend_title("TOOL_ID") +
  theme_minimal() 

6 B0-0 MILLENIAL PINK PLANTS

6.0.1 GGBIVARIATE

if(graph_render == TRUE){


### EXPLORE ALL QUESTIONS AS EXPLAINED BY DEMOGRAPHICS
df <- df_graphs %>% filter(STIMULUS=="B0-0")
predictors = df %>% select(Distribution, STIMULUS_CATEGORY,
                D_gender_collapsed, D_age, D_education, D_income,
                D_politicalParty, D_politicsFiscal, D_politicsSocial) %>% colnames()

questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)

i=0
for (q in questions){
  i = i+1
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0("B0-0 ",q, " as explained by demographic variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/_millenial-pink-plants/bivariate/bivariate_demographics", filename =paste0("B0-0_",q,"_demo_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g)
#####################################################END




### EXPLORE ALL QUESTIONS AS EXPLAINED BY SD VARS
df <- df_graphs %>% filter(STIMULUS=="B0-0")
predictors = c(ref_sd_questions)
questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)
i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0("B0-0 ",q, " as explained by SD variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/_millenial-pink-plants/bivariate/bivariate_SD", filename =paste0("B0-0_",q,"_sd_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g )
#####################################################END



### EXPLORE ALL QUESTIONS AS EXPLAINED BY CONF VARS
df <- df_graphs %>% filter(STIMULUS=="B0-0")
predictors = c(ref_conf_questions)
questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)
i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0("B0-0 ",q, " as explained by CONF variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/_millenial-pink-plants/bivariate/bivariate_CONF", filename =paste0("B0-0_",q,"_conf_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g)
#####################################################END


### EXPLORE ALL QUESTIONS AS EXPLAINED BY CAT VARS
df <- df_graphs %>% filter(STIMULUS=="B0-0")
predictors = c(ref_cat_questions)
questions = c(ref_sd_questions, ref_conf_questions, ref_cat_questions)
i=0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  g <- ggbivariate(df, outcome = q, explanatory = predictors) + labs(
    title = paste0("B0-0 ",q, " as explained by CATEGORICAL variables")
  )+ theme_minimal()
  ggsave(plot = g, path="figs/_millenial-pink-plants/bivariate/bivariate_CAT", filename =paste0("B0-0_",q,"_cat_ggbivariate.png"), units = c("in"), width = 10, height = 25)
}
rm(i,g)
#####################################################END

}

6.1 PAIRPLOTS

if(graph_render == TRUE){


#####################################################PAIRPLOT_SD
df <- df_graphs %>% filter(STIMULUS=="B0-0") %>% select(all_of(ref_sd_questions))
g <- ggpairs(df) 
ggsave(plot = g, path="figs/_millenial-pink-plants", filename =paste0("B00","_sd_pairplot.png"), units = c("in"), width = 20, height = 20  )
g <- ggduo(df)
ggsave(plot = g, path="figs/_millenial-pink-plants", filename =paste0("B00","_sd_duoplot.png"), units = c("in"), width = 20, height = 20  )

#####################################################PAIRPLOT_CAT
df <- df_graphs %>% filter(STIMULUS=="B0-0") %>% select(all_of(ref_cat_questions))
g <- ggpairs(df) 
ggsave(plot = g, path="figs/_millenial-pink-plants", filename =paste0("B00","_cat_pairplot.png"), units = c("in"), width = 20, height = 20  )
g <- ggduo(df)
ggsave(plot = g, path="figs/_millenial-pink-plants", filename =paste0("B00","_cat_duoplot.png"), units = c("in"), width = 20, height = 20  )


#####################################################PAIRPLOT_CONF
df <- df_graphs %>% filter(STIMULUS=="B0-0") %>% select(all_of(ref_conf_questions))
g <- ggpairs(df) 
ggsave(plot = g, path="figs/_millenial-pink-plants", filename =paste0("B00","_conf_pairplot.png"), units = c("in"), width = 20, height = 20  )
g <- ggduo(df)
ggsave(plot = g, path="figs/_millenial-pink-plants", filename =paste0("B00","_conf_duoplot.png"), units = c("in"), width = 20, height = 20  )

####################################################################################################
####################################################################################################






#####################################################PAIRPLOT_SD COLORED BY DEMO
predictors = df_graphs %>% select(Distribution, STIMULUS_CATEGORY,
                D_gender_collapsed, D_age, D_education, D_income,
                D_politicalParty) %>% colnames()

i=0
for (p in predictors){
  df <- df_graphs %>% filter(STIMULUS=="B0-0") %>% select(p, all_of(ref_sd_questions))
  g <- ggpairs(df, aes(color = get(p)))   
  ggsave(plot = g, path="figs/_millenial-pink-plants/colored_pairs", filename =paste0("B00_",p,"_sd_pairplot.png"), units = c("in"), width = 40, height = 40  )  
  g <- ggduo(df, aes(color = get(p)))   
  ggsave(plot = g, path="figs/_millenial-pink-plants/colored_duos", filename =paste0("B00_",p,"_sd_duoplot.png"), units = c("in"), width = 40, height = 40  )  
  
}
rm(i,g,df)
####################################################################################################



}

6.2 MODELLING

6.2.1 Modelling Trust

df <- df_graphs %>% filter(STIMULUS == "B0-0") %>%
  mutate(
    flipped_data = abs(MAKER_DATA - 100),
    flipped_design = abs(MAKER_DESIGN -100),
    flipped_intent = abs(CHART_INTENT -100),
    flipped_self = abs(MAKER_SELF -100)
  ) %>% select(
    flipped_data:flipped_self, MAKER_ARGUE, MAKER_ALIGN, CHART_LIKE, CHART_BEAUTY, CHART_TRUST, MAKER_TRUST
  )


chart_m.1 <- lm(CHART_TRUST ~ CHART_BEAUTY, data = df )
summ(chart_m.1)
Observations 318
Dependent variable CHART_TRUST
Type OLS linear regression
F(1,316) 153.65
0.33
Adj. R² 0.33
Est. S.E. t val. p
(Intercept) 29.65 1.94 15.29 0.00
CHART_BEAUTY 0.43 0.03 12.40 0.00
Standard errors: OLS
chart_m.2 <- lm(CHART_TRUST ~ MAKER_ALIGN , data = df )
summ(chart_m.2)
Observations 318
Dependent variable CHART_TRUST
Type OLS linear regression
F(1,316) 60.44
0.16
Adj. R² 0.16
Est. S.E. t val. p
(Intercept) 21.52 3.90 5.51 0.00
MAKER_ALIGN 0.47 0.06 7.77 0.00
Standard errors: OLS
anova(chart_m.1, chart_m.2)
## Analysis of Variance Table
## 
## Model 1: CHART_TRUST ~ CHART_BEAUTY
## Model 2: CHART_TRUST ~ MAKER_ALIGN
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1    316  85711                      
## 2    316 106934  0    -21223
compare_performance(chart_m.1, chart_m.2, rank = TRUE)
## # Comparison of Model Performance Indices
## 
## Name      | Model |    R2 | R2 (adj.) |   RMSE |  Sigma | AIC weights | AICc weights | BIC weights | Performance-Score
## ----------------------------------------------------------------------------------------------------------------------
## chart_m.1 |    lm | 0.327 |     0.325 | 16.417 | 16.469 |       1.000 |        1.000 |       1.000 |           100.00%
## chart_m.2 |    lm | 0.161 |     0.158 | 18.338 | 18.396 |    5.29e-16 |     5.29e-16 |    5.29e-16 |             0.00%
m.1 <- lm(MAKER_TRUST ~ CHART_BEAUTY, data = df )
summ(m.1)
Observations 318
Dependent variable MAKER_TRUST
Type OLS linear regression
F(1,316) 62.76
0.17
Adj. R² 0.16
Est. S.E. t val. p
(Intercept) 48.24 1.83 26.36 0.00
CHART_BEAUTY 0.26 0.03 7.92 0.00
Standard errors: OLS
m.2 <- lm(MAKER_TRUST ~ MAKER_ALIGN , data = df )
summ(m.2)
Observations 318
Dependent variable MAKER_TRUST
Type OLS linear regression
F(1,316) 125.33
0.28
Adj. R² 0.28
Est. S.E. t val. p
(Intercept) 28.00 3.05 9.17 0.00
MAKER_ALIGN 0.53 0.05 11.20 0.00
Standard errors: OLS
anova(m.1, m.2)
## Analysis of Variance Table
## 
## Model 1: MAKER_TRUST ~ CHART_BEAUTY
## Model 2: MAKER_TRUST ~ MAKER_ALIGN
##   Res.Df   RSS Df Sum of Sq F Pr(>F)
## 1    316 76327                      
## 2    316 65505  0     10822
compare_performance(chart_m.1, chart_m.2, m.1, m.2, rank = TRUE)
## When comparing models, please note that probably not all models were fit
##   from same data.
## # Comparison of Model Performance Indices
## 
## Name      | Model |    R2 | R2 (adj.) |   RMSE |  Sigma | AIC weights | AICc weights | BIC weights | Performance-Score
## ----------------------------------------------------------------------------------------------------------------------
## m.2       |    lm | 0.284 |     0.282 | 14.352 | 14.398 |       1.000 |        1.000 |       1.000 |            92.60%
## chart_m.1 |    lm | 0.327 |     0.325 | 16.417 | 16.469 |    2.72e-19 |     2.72e-19 |    2.72e-19 |            42.34%
## m.1       |    lm | 0.166 |     0.163 | 15.493 | 15.542 |    2.77e-11 |     2.77e-11 |    2.77e-11 |            21.28%
## chart_m.2 |    lm | 0.161 |     0.158 | 18.338 | 18.396 |    1.44e-34 |     1.44e-34 |    1.44e-34 |             0.00%
m.3 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY , data = df )
summ(m.3)
Observations 318
Dependent variable CHART_TRUST
Type OLS linear regression
F(3,314) 116.37
0.53
Adj. R² 0.52
Est. S.E. t val. p
(Intercept) 0.73 3.31 0.22 0.82
MAKER_TRUST 0.56 0.06 9.80 0.00
MAKER_ALIGN 0.04 0.05 0.74 0.46
CHART_BEAUTY 0.27 0.03 8.62 0.00
Standard errors: OLS
m.4 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY + flipped_intent, data = df )
summ(m.4)
Observations 318
Dependent variable CHART_TRUST
Type OLS linear regression
F(4,313) 93.69
0.54
Adj. R² 0.54
Est. S.E. t val. p
(Intercept) -0.62 3.27 -0.19 0.85
MAKER_TRUST 0.50 0.06 8.75 0.00
MAKER_ALIGN 0.02 0.05 0.42 0.67
CHART_BEAUTY 0.28 0.03 9.04 0.00
flipped_intent 0.10 0.03 3.56 0.00
Standard errors: OLS
m.5 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY + flipped_intent + flipped_data, data = df )
summ(m.5)
Observations 318
Dependent variable CHART_TRUST
Type OLS linear regression
F(5,312) 75.05
0.55
Adj. R² 0.54
Est. S.E. t val. p
(Intercept) -0.94 3.30 -0.29 0.78
MAKER_TRUST 0.50 0.06 8.47 0.00
MAKER_ALIGN 0.02 0.05 0.43 0.67
CHART_BEAUTY 0.28 0.03 8.64 0.00
flipped_intent 0.09 0.03 3.42 0.00
flipped_data 0.03 0.03 0.87 0.38
Standard errors: OLS
m.6 <- lm(MAKER_TRUST ~ CHART_BEAUTY + MAKER_ALIGN + flipped_intent + flipped_data + flipped_design, data = df )
summ(m.6)
Observations 318
Dependent variable MAKER_TRUST
Type OLS linear regression
F(5,312) 43.04
0.41
Adj. R² 0.40
Est. S.E. t val. p
(Intercept) 19.61 3.07 6.38 0.00
CHART_BEAUTY 0.14 0.03 4.36 0.00
MAKER_ALIGN 0.39 0.05 8.22 0.00
flipped_intent 0.11 0.03 4.07 0.00
flipped_data 0.10 0.03 2.79 0.01
flipped_design 0.01 0.03 0.28 0.78
Standard errors: OLS
m.7 <- lm(CHART_TRUST ~ CHART_BEAUTY + MAKER_ALIGN + flipped_intent + flipped_data + flipped_design, data = df )
summ(m.7)
Observations 318
Dependent variable CHART_TRUST
Type OLS linear regression
F(5,312) 49.52
0.44
Adj. R² 0.43
Est. S.E. t val. p
(Intercept) 9.44 3.52 2.68 0.01
CHART_BEAUTY 0.36 0.04 9.73 0.00
MAKER_ALIGN 0.21 0.05 3.94 0.00
flipped_intent 0.14 0.03 4.75 0.00
flipped_data 0.09 0.04 2.24 0.03
flipped_design -0.03 0.04 -0.66 0.51
Standard errors: OLS
m <- lm(MAKER_TRUST ~ MAKER_ALIGN, data = df)
summ(m)
Observations 318
Dependent variable MAKER_TRUST
Type OLS linear regression
F(1,316) 125.33
0.28
Adj. R² 0.28
Est. S.E. t val. p
(Intercept) 28.00 3.05 9.17 0.00
MAKER_ALIGN 0.53 0.05 11.20 0.00
Standard errors: OLS
 compare_performance(m.1, m.2, m.3, m.4, m.5, m, rank = TRUE)
## When comparing models, please note that probably not all models were fit
##   from same data.
## # Comparison of Model Performance Indices
## 
## Name | Model |    R2 | R2 (adj.) |   RMSE |  Sigma | AIC weights | AICc weights | BIC weights | Performance-Score
## -----------------------------------------------------------------------------------------------------------------
## m.4  |    lm | 0.545 |     0.539 | 13.502 | 13.610 |       0.646 |        0.657 |       0.897 |            99.84%
## m.5  |    lm | 0.546 |     0.539 | 13.486 | 13.615 |       0.350 |        0.340 |       0.074 |            73.41%
## m.3  |    lm | 0.526 |     0.522 | 13.773 | 13.860 |       0.003 |        0.003 |       0.029 |            52.47%
## m.2  |    lm | 0.284 |     0.282 | 14.352 | 14.398 |    4.78e-08 |     5.35e-08 |    1.87e-05 |            25.53%
## m    |    lm | 0.284 |     0.282 | 14.352 | 14.398 |    4.78e-08 |     5.35e-08 |    1.87e-05 |            25.53%
## m.1  |    lm | 0.166 |     0.163 | 15.493 | 15.542 |    1.32e-18 |     1.48e-18 |    5.17e-16 |             0.00%
# 
# ## WHAT PREDICTS MAKER_TRUST?
# 
# 
# ## PLOT THE DATA 
# ggplot(df, aes(x = MAKER_ALIGN, y = MAKER_TRUST)) + 
#   geom_point() +
#   stat_smooth(method = "lm", 
#               formula = y ~ x, 
#               geom = "smooth") + 
#   labs(title = "MAKER ALIGNMENT PREDICTS TRUST?")+
#   theme_minimal()
# 
# 
# ## PLOT THE DATA 
# ggplot(df, aes(x = flipped_data, y = MAKER_TRUST)) + 
#   geom_point() +
#   stat_smooth(method = "lm", 
#               formula = y ~ x, 
#               geom = "smooth") + 
#   labs(title = "MAKER DATA PREDICTS TRUST?")+
#   theme_minimal()
# 
# 
# ## BUILD MODEL
# m.1 <- lm( MAKER_TRUST ~ MAKER_ALIGN, data = df)
# # summary(m.1)
# jtools::summ(m.1, confint = TRUE)
# # check_model(m.1)
# # report(m.1)
# 
# ## BUILD MODEL
# m.2 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data, data = df)
# # summary(m.2)
# jtools::summ(m.2, confint = TRUE)
# # check_model(m.2)
# # report(m.2)
# 
# ## BUILD MODEL
# m.3 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data + flipped_design , data = df)
# # summary(m.3)
# jtools::summ(m.3, confint = TRUE)
# # check_model(m.3)
# # report(m.3)
# 
# 
# ## BUILD MODEL
# m.4 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data + flipped_design + MAKER_POLITIC, data = df)
# # summary(m.3)
# jtools::summ(m.4, confint = TRUE)
# # check_model(m.3)
# # report(m.3)
# 
# 
# 
#   
# compare_performance(m.1, m.2, m.3, m.4, m.B,  rank = TRUE)
# 
# 
# # effect_plot(m.2, pred=MAKER_DATA, rug = TRUE, plot.points = TRUE) + 
# #   xlim(0,100) + 
# #   ylim(0,100)
# 
# 
# 
# 
# df <- df_graphs %>% filter(STIMULUS != "B0-0") %>% 
#   select(PID, STIMULUS, 
#          MAKER_ID:MAKER_TRUST, CHART_LIKE:CHART_TRUST, 
#          D_politicsFiscal, D_politicsSocial)
# 
# m.pid <- lmer( MAKER_TRUST ~  (1 | PID) , data = df, REML = FALSE)
# m.stim <- lmer( MAKER_TRUST ~ (1|STIMULUS) , data = df, REML = FALSE)
# m.r <- lmer( MAKER_TRUST ~ (1 | PID) + (1|STIMULUS), data = df, REML = FALSE)
# 
# compare_performance (m.pid, m.stim, m.r, rank = TRUE)
# 
# 
# 
# m.1 <- update( m.r, .~. + CHART_BEAUTY)
# summ(m.1)
# 
# m.2 = update(m.1, . ~ . + CHART_LIKE)
# summ(m.2)
# 
# 
# m.3 = update(m.2, . ~ . + CHART_TRUST)
# summ(m.3)
# 
# 
# compare_performance(m.r, m.1, m.2, m.3, rank = TRUE)

6.2.2 TODO COME BACK HERE Modelling Engagement

# df <- df_graphs %>% filter(STIMULUS == "B0-0") %>%
#   mutate(
#     flipped_data = abs(MAKER_DATA - 100),
#     flipped_design = abs(MAKER_DESIGN -100),
#     flipped_intent = abs(CHART_INTENT -100),
#     flipped_self = abs(MAKER_SELF -100),
#   ) %>% select(
#     MAKER_ARGUE, MAKER_ALIGN, CHART_LIKE, CHART_BEAUTY,
#     flipped_data:flipped_self,  CHART_TRUST, MAKER_TRUST, 
#     Distribution, D_gender, D_education, D_politicalParty, 
#     D_politicsSocial, D_politicsFiscal,
#     MAKER_ID, MAKER_AGE, MAKER_GENDER, 
#   )


## left off here !!! 
## MAKE DATAFRAME OF ACTIONS WITH OTHER STUFF 
# df_g <- df_graphs %>% filter(STIMULUS == "B0-0")
# df_a <- df_actions %>% filter(STIMULUS == "B0-0")
# df_x <- df_a %>% inner_join(x=df_a, y=df_g, by="PID",
#                            relationship = "many-to-one",
#                            multiple = "last",
#                            keep = FALSE)
# rm(df_a, df_g)
# 
# 
# df <- df_x %>% filter(STIMULUS == "B0-0") %>%
#   mutate(
#     flipped_data = abs(MAKER_DATA - 100),
#     flipped_design = abs(MAKER_DESIGN -100),
#     flipped_intent = abs(CHART_INTENT -100),
#     flipped_self = abs(MAKER_SELF -100),
#   ) %>% select(
#     MAKER_ARGUE, MAKER_ALIGN, CHART_LIKE, CHART_BEAUTY,
#     flipped_data:flipped_self,  CHART_TRUST, MAKER_TRUST,
#     Distribution, D_gender, D_education, D_politicalParty,
#     D_politicsSocial, D_politicsFiscal,
#     MAKER_ID, MAKER_AGE, MAKER_GENDER,
#   )
# 


## PLOT DISTRIBUTION OF ENGAGEMENT 
# 
# (a <- ggbarstats( data = df, x = "REV_CHART_ACTION", y = "STIMULUS",
#                  legend.title = "ENGAGEMENT")  + 
#       scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
#       labs( title = "Chart Engagement ", subtitle = "", x = "") )
#   
#         
# 
# (b <- grouped_ggbarstats( data = df_a, x = "REV_CHART_ACTION", y = "STIMULUS", 
#                     grouping.var = Distribution, 
#                     legend.title = "ENGAGEMENT",  
#                     ggplot.component = list(scale_fill_paletteer_d("awtools::a_palette", direction = 1))))
# 
# 
# 
# df <- df_a %>% mutate( REV_CHART_ACTION = fct_rev(CHART_ACTION))
# predictors = df_a %>% select(Distribution, PLATFORM, 
#                 D_gender, D_age, D_education, D_race, D_income, 
#                 D_politicalParty, D_politicsFiscal, D_politicsSocial) %>% colnames()
# 
# 
# 
# 
# 
# ## PLOT POTENTIAL BIVARIATE
# (g <- ggbivariate(df_a, outcome = "CHART_ACTION", explanatory = predictors) + 
#   scale_fill_paletteer_d("awtools::a_palette", direction = 1))
# 
# ggsave(plot = g, path="figs/modelling", filename =paste0("b00_engagement","_bivariatet.png"), units = c("in"), width = 15, height = 20  )
# 
# 
# ## PLOT EXPLANATIONS OF ENGAGEMENT
# 
# 
# 
# ##
# 
# ## Visualize Engagement with B00
# 
# # 
# # 
# # 
# # 
# # chart_m.1 <- lm(CHART_TRUST ~ CHART_BEAUTY, data = df )
# # summ(chart_m.1)
# # 
# # chart_m.2 <- lm(CHART_TRUST ~ MAKER_ALIGN , data = df )
# # summ(chart_m.2)
# # 
# # 
# # anova(chart_m.1, chart_m.2)
# # compare_performance(chart_m.1, chart_m.2, rank = TRUE)
# # 
# # 
# # m.1 <- lm(MAKER_TRUST ~ CHART_BEAUTY, data = df )
# # summ(m.1)
# # 
# # m.2 <- lm(MAKER_TRUST ~ MAKER_ALIGN , data = df )
# # summ(m.2)
# # 
# # anova(m.1, m.2)
# # compare_performance(chart_m.1, chart_m.2, m.1, m.2, rank = TRUE)
# # 
# # 
# # m.3 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY , data = df )
# # summ(m.3)
# # 
# # m.4 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY + flipped_intent, data = df )
# # summ(m.4)
# # 
# # m.5 <- lm(CHART_TRUST ~ MAKER_TRUST + MAKER_ALIGN + CHART_BEAUTY + flipped_intent + flipped_data, data = df )
# # summ(m.5)
# # 
# # 
# # m.6 <- lm(MAKER_TRUST ~ CHART_BEAUTY + MAKER_ALIGN + flipped_intent + flipped_data + flipped_design, data = df )
# # summ(m.6)
# #  
# # 
# # m.7 <- lm(CHART_TRUST ~ CHART_BEAUTY + MAKER_ALIGN + flipped_intent + flipped_data + flipped_design, data = df )
# # summ(m.7)
# # 
# # m <- lm(MAKER_TRUST ~ MAKER_ALIGN, data = df)
# # summ(m)
# # 
# #  
# #  compare_performance(m.1, m.2, m.3, m.4, m.5, m, rank = TRUE)
# # # 
# # ## WHAT PREDICTS MAKER_TRUST?
# # 
# # 
# # ## PLOT THE DATA 
# # ggplot(df, aes(x = MAKER_ALIGN, y = MAKER_TRUST)) + 
# #   geom_point() +
# #   stat_smooth(method = "lm", 
# #               formula = y ~ x, 
# #               geom = "smooth") + 
# #   labs(title = "MAKER ALIGNMENT PREDICTS TRUST?")+
# #   theme_minimal()
# # 
# # 
# # ## PLOT THE DATA 
# # ggplot(df, aes(x = flipped_data, y = MAKER_TRUST)) + 
# #   geom_point() +
# #   stat_smooth(method = "lm", 
# #               formula = y ~ x, 
# #               geom = "smooth") + 
# #   labs(title = "MAKER DATA PREDICTS TRUST?")+
# #   theme_minimal()
# # 
# # 
# # ## BUILD MODEL
# # m.1 <- lm( MAKER_TRUST ~ MAKER_ALIGN, data = df)
# # # summary(m.1)
# # jtools::summ(m.1, confint = TRUE)
# # # check_model(m.1)
# # # report(m.1)
# # 
# # ## BUILD MODEL
# # m.2 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data, data = df)
# # # summary(m.2)
# # jtools::summ(m.2, confint = TRUE)
# # # check_model(m.2)
# # # report(m.2)
# # 
# # ## BUILD MODEL
# # m.3 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data + flipped_design , data = df)
# # # summary(m.3)
# # jtools::summ(m.3, confint = TRUE)
# # # check_model(m.3)
# # # report(m.3)
# # 
# # 
# # ## BUILD MODEL
# # m.4 <- lm( MAKER_TRUST ~ MAKER_ALIGN + flipped_data + flipped_design + MAKER_POLITIC, data = df)
# # # summary(m.3)
# # jtools::summ(m.4, confint = TRUE)
# # # check_model(m.3)
# # # report(m.3)
# # 
# # 
# # 
# #   
# # compare_performance(m.1, m.2, m.3, m.4, m.B,  rank = TRUE)
# # 
# # 
# # # effect_plot(m.2, pred=MAKER_DATA, rug = TRUE, plot.points = TRUE) + 
# # #   xlim(0,100) + 
# # #   ylim(0,100)
# # 
# # 
# # 
# # 
# # df <- df_graphs %>% filter(STIMULUS != "B0-0") %>% 
# #   select(PID, STIMULUS, 
# #          MAKER_ID:MAKER_TRUST, CHART_LIKE:CHART_TRUST, 
# #          D_politicsFiscal, D_politicsSocial)
# # 
# # m.pid <- lmer( MAKER_TRUST ~  (1 | PID) , data = df, REML = FALSE)
# # m.stim <- lmer( MAKER_TRUST ~ (1|STIMULUS) , data = df, REML = FALSE)
# # m.r <- lmer( MAKER_TRUST ~ (1 | PID) + (1|STIMULUS), data = df, REML = FALSE)
# # 
# # compare_performance (m.pid, m.stim, m.r, rank = TRUE)
# # 
# # 
# # 
# # m.1 <- update( m.r, .~. + CHART_BEAUTY)
# # summ(m.1)
# # 
# # m.2 = update(m.1, . ~ . + CHART_LIKE)
# # summ(m.2)
# # 
# # 
# # m.3 = update(m.2, . ~ . + CHART_TRUST)
# # summ(m.3)
# # 
# # 
# # compare_performance(m.r, m.1, m.2, m.3, rank = TRUE)
df <- df_graphs_full %>% 
  filter(STIMULUS== "B0-0") 

 p <- ggplot(df, aes(x = MAKER_POLITIC, y = MAKER_CONF,
          color = MAKER_ID ,
          text = paste0("MAKER-DETAIL: ",MAKER_DETAIL, "<br>","ID: ", PID, "MAKER_EXPLAIN", MAKER_EXPLAIN)))+
      scale_color_viridis(discrete=TRUE, option="viridis")  +
      geom_point(size=0.5) +
      xlim(0,100)+
      ylim(0,100)+
      facet_grid(D_politicalParty ~ MAKER_ID) +
      theme_minimal() +
      labs(
        title = "B0-0 | MAKER Identification, Politics and Confidence",
        subtitle = "\n by Participant Confidence and Political Party",
        x = "MAKER POLITICS",
        y = "MAKER ID CONFIDENCE"
      )


ggplotly(p)

6.2.3 On Confidence

# df <- df_confidence_long %>% filter(QUESTION %nin% c("CONF", "SDIFF")) ##AVERAGE CONFIDENCE

df <- df_graphs_full %>% filter(STIMULUS== "B0-0")


ggplot(df, aes(x = MAKER_CONF, y = MAKER_TRUST, color = MAKER_ID)) + 
  geom_point() +
  scale_color_viridis(discrete=TRUE, option="viridis")  + 
  facet_grid(BLOCK ~ STIMULUS_CATEGORY) +
  theme_minimal() + 
  easy_remove_legend() 

ggplot(df, aes(x = MAKER_POLITIC, y = MAKER_TRUST, color = MAKER_ID)) + 
  geom_point() +
  scale_color_viridis(discrete=TRUE, option="viridis")  + 
  facet_grid(BLOCK ~ STIMULUS_CATEGORY) +
  theme_minimal() + 
  easy_remove_legend() 

7 EDA

7.1 PAIR PLOTS

7.1.1 B0-0 MILLENIAL PINK PLANTS

#FILTER DATAFRAME
df <- df_graphs_full %>%
  filter(STIMULUS== "B0-0") %>%
  select(
    Distribution,  D_politicalParty,
    # D_gender, D_race, D_education, D_income, D_age,
    # ENCOUNTER,
    MAKER_ID,
    D_politicsSocial, D_politicsFiscal,
    MAKER_DESIGN, MAKER_DATA, MAKER_CONF, MAKER_ALIGN, MAKER_TRUST, CHART_TRUST
    # :MAKER_TRUST,
    # TOOL_CONF,CHART_LIKE:CHART_TRUST
)

# g <- ggpairs(df, color=MAKER_ID)
# ggsave(plot = g, path="figs/pairplots", filename =paste0("B0-0","_pairplot.svg"), units = c("in"), width = 20, height = 20  )
# ggplotly(g)




(boo_pair <- ggpairs(df, aes(color= MAKER_ID)) +
    scale_color_viridis(discrete=TRUE, option="viridis")+
    scale_fill_viridis(discrete=TRUE, option="viridis") +
    theme_minimal()
  )

(boo_duo <- ggduo(df, aes(color= Distribution)) +
    scale_color_viridis(discrete=TRUE, option="viridis")+
    scale_fill_viridis(discrete=TRUE, option="viridis") +
    theme_minimal()
  )

ggplotly(boo_duo) #surprisingly works! but not helpful

7.1.2 CATEGORIES-MAKER

df <- df_graphs %>% select(
  STIMULUS_CATEGORY, MAKER_ID, MAKER_DESIGN, MAKER_DATA, MAKER_POLITIC, MAKER_ARGUE, MAKER_ALIGN, MAKER_TRUST, CHART_TRUST, CHART_BEAUTY
)


#maker pair plot 
x <- ggduo(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("maker_category","_duoplot.png"), units = c("in"), width = 20, height = 20  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#maker pair plot 
x <- ggpairs(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("maker_category","_pairplot.png"), units = c("in"), width = 20, height = 20  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

7.1.3 CATEGORIES-CHART

df <- df_graphs %>% select(
  STIMULUS_CATEGORY, MAKER_ID, CHART_LIKE, CHART_BEAUTY, CHART_INTENT, CHART_TRUST, MAKER_TRUST
)


#maker pair plot 
x <- ggduo(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("chart_category","_duoplot.png"), units = c("in"), width = 20, height = 20  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#maker pair plot 
x <- ggpairs(df, aes(color = STIMULUS_CATEGORY))
ggsave(plot = x, path="figs/pairplots", filename =paste0("chart_category","_pairplot.png"), units = c("in"), width = 20, height = 20  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

7.1.4 ALL NUMERIC

df <- df_graphs %>% select(STIMULUS_CATEGORY, where(is.numeric)) %>% 
  select(-duration.min)


#maker pair plot 
x <- ggscatmat(df, color = "STIMULUS_CATEGORY")
## Warning in ggscatmat(df, color = "STIMULUS_CATEGORY"): Factor variables are
## omitted in plot
ggsave(plot = x, path="figs/pairplots", filename =paste0("category","_scaplot.png"), units = c("in"), width = 20, height = 20  )

8 INTERACTIVE GRAPHS

8.0.1 WIP IXN Maker ID & Politics

#SETUP LISTS 
stim <- ref_stimuli
plots_maker_politics <- htmltools::tagList()

### MAKE PLOTS 
# LOOP THROUGH STIMULI
i = 0
for (s in stim){
  i = i+1 #iterator hack
  
  d <- df_graphs_full %>% filter(STIMULUS == s) %>% select(PID, STIMULUS, 
                                                               MAKER_ID, MAKER_DETAIL, MAKER_CONF, MAKER_POLITIC,
                                                               D_politicalParty, D_politicsSocial, D_politicsFiscal) 
  p <- ggplot(d, aes(x = MAKER_POLITIC, y = MAKER_CONF,
          color = MAKER_ID ,
          text = paste0("MAKER-DETAIL: ",MAKER_DETAIL, "<br>","ID: ", PID)))+
      geom_point() +
      xlim(0,100)+
      ylim(0,100)+
      facet_grid(MAKER_ID  ~ D_politicalParty) +
      theme_minimal() +
      labs(
        title = paste(s," | ", "MAKER Identification, Politics and Confidence, by Participant Confidence and Political Affiliation"),
        x = "MAKER POLITICS",
        y = "MAKER CONFIDENCE"
      )

  
  plots_maker_politics[[i]] <- ggplotly(p)
  
}

names(plots_maker_politics) <- ref_stimuli


#works in console but not render 
### PRINT PLOTS
# for (s in stim){
#   print(plots_maker_politics[[s]])
# }


# ggplotly(p) #works for single

plots_maker_politics

9 WIP QDA ANALYSIS

9.1 MAKER ID DETAIL

# df <- df_graphs_coded %>% select( ID.Prolific, STIMULUS, BLOCK, STIMULUS_CATEGORY, MAKER_ID, MAKER_DETAIL, CODE_M_ID_SPECIFIC) %>% 
#   separate_longer_delim(
#     cols = CODE_M_ID_SPECIFIC,
#     delim = ","
#   ) %>% filter(
#     CODE_M_ID_SPECIFIC %nin% c("x", "X","example")
#   ) %>% mutate(
#     CODED_MAKER = str_trim(CODE_M_ID_SPECIFIC, side="left"),
#     CODED_MAKER = str_to_upper(CODED_MAKER),
#     CODED_MAKER = factor(CODED_MAKER, levels = c( "NYT",
#                                                   "WASHINGTON POST" ,
#                                                   "USA TODAY" ,
#                                                   "THE ECONOMIST" ,
#                                                   "WSJ",
#                                                   "POPULAR SCIENCE" ,
#                                                   "TIME",
#                                                   "NPR",
#                                                   "PBS",
#                                                   "ASSOCIATED PRESS"  ,
#                                                   "BBC",
#                                                   "CBS NEWS"  ,
#                                                   "ABC NEWS"  ,
#                                                   "NBC",
#                                                   "CNN",
#                                                   "FOX NEWS"  ,
#                                                   "BUZZFEED",
#                                                   "VOX",
#                                                   "VICE",
#                                                   "HUFFINGTON POST" ,
#                                                   "US DOD"  ,
#                                                   "US BUREAU OF ECONOMIC ANALYSIS",
#                                                   "US DOE"  ,
#                                                   "US CDC"  ,
#                                                   "MINISTRY OF HEALTH"  ,
#                                                   "EPA",
#                                                   "NOAA",
#                                                   "NATIONAL WEATHER SERVICE"  ,
#                                                   "WHO",
#                                                   "UN",
#                                                   "THE NATURE CONSERVANCY"  ,
#                                                   "GREENPEACE",
#                                                   "LEAF",
#                                                   "HARVARD",
#                                                   "COLUMBIA UNIVERSITY" ,
#                                                   "IAA",
#                                                   "IBM",
#                                                   "MICROSOFT",
#                                                   "GOLDMAN SACHS" ,
#                                                   "GMC",
#                                                   "TESLA",
#                                                   "GREEN PARTY OF AMERICA"  ,
#                                                   "THE DEMOCRATIC PARTY"  ,
#                                                   "THE REPUBLICAN PARTY")),
#     STIM = factor(STIMULUS, levels = c(
#       "B1-1",  "B2-1", "B3-1", "B4-1", "B5-1", "B6-1",
#       "B1-2",  "B2-2", "B3-2", "B4-2", "B5-2", "B6-2",
#       "B1-3", "B2-3", "B3-3", "B4-3", "B5-3", "B6-3",
#       "B1-4","B2-4" ,"B3-4" ,"B4-4" ,"B5-4" ,"B6-4", "B0-0"
#     ))
#   ) 
# 
# 
# 
# ggplot(df,  aes(x = MAKER_ID, fill = CODED_MAKER )) + 
#   geom_bar(position = "stack") +
#   facet_grid( rows = vars(BLOCK), cols = vars(fct_rev(STIMULUS_CATEGORY))) + 
#   theme_minimal() + 
#   labs(title = "Specific makers identified by stimulus")
# 
#                           
# ggplot(df,  aes(x = STIMULUS, fill = CODED_MAKER )) + 
#   geom_bar(position = "stack") + 
#   theme_minimal()
# 
# 
# ggplot(df,  aes(x = STIMULUS, fill = CODED_MAKER )) + 
#   geom_bar(position = "stack") + 
#   facet_wrap(~CODED_MAKER) + 
#   theme_minimal() +
#   easy_remove_x_axis() +
#   easy_remove_legend()
# 
#   
# ggplot(df,  aes(x = CODE_M_ID_SPECIFIC, fill = STIM, )) + 
#   geom_bar(position = "stack") + 
#   coord_flip() + 
#   theme_minimal() 
#   

10 MODELLING

##STATSPLOT
# grouped_ggwithinstats(
#   data  = df_graphs,
#   x     = STIMULUS,
#   y     = CHART_TRUST,
#   grouping.var = ID.Study
# ) 

# grouped_ggscatterstats(
#   data = df_graphs, ## data frame from which variables are taken
#   x = CHART_BEAUTY, ## predictor/independent variable
#   y = CHART_TRUST, ## dependent variable
#   grouping.var = ID.Study,
#   xlab = "CHART BEAUTY", ## label for the x-axis
#   ylab = "CHART TRUST", ## label for the y-axis
#   # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
#   point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
#   xfill = "#CC79A7", ## fill for marginals on the x-axis
#   yfill = "#009E73" ## fill for marginals on the y-axis
#   # title = "CHART TRUST (VS) MAKER TRUST",
#   # caption = ""
# )
# 
# gf_point( data = df_graphs, CHART_TRUST~MAKER_TRUST, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
# 
# 
# grouped_ggscatterstats(
#   data = df_graphs, ## data frame from which variables are taken
#   x = MAKER_TRUST, ## predictor/independent variable
#   y = CHART_TRUST, ## dependent variable
#   grouping.var = ID.Study,
#   xlab = "MAKER TRUST", ## label for the x-axis
#   ylab = "CHART TRUST", ## label for the y-axis
#   # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
#   point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
#   xfill = "#CC79A7", ## fill for marginals on the x-axis
#   yfill = "#009E73" ## fill for marginals on the y-axis
#   # title = "CHART TRUST (VS) MAKER TRUST",
#   # caption = ""
# )
# 
# gf_point( data = df_graphs, MAKER_TRUST~CHART_TRUST, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
# 
# 
# 
# grouped_ggscatterstats(
#   data = df_graphs, ## data frame from which variables are taken
#   x = CHART_BEAUTY, ## predictor/independent variable
#   y = MAKER_ALIGN, ## dependent variable
#   grouping.var = ID.Study,
#   xlab = "CHART BEAUTY", ## label for the x-axis
#   ylab = "MAKER ALIGN", ## label for the y-axis
#   # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
#   point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
#   xfill = "#CC79A7", ## fill for marginals on the x-axis
#   yfill = "#009E73" ## fill for marginals on the y-axis
#   # title = "CHART TRUST (VS) MAKER TRUST",
#   # caption = ""
# )
# 
# 
# gf_point( data = df_graphs, MAKER_ALIGN~CHART_BEAUTY, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
# 
# 
# 
# 
# 
# 
# ggscatterstats(
#   data = df_graphs %>% filter(STIMULUS=="B0-0"), ## data frame from which variables are taken
#   x = CHART_BEAUTY, ## predictor/independent variable
#   y = MAKER_TRUST, ## dependent variable
#   xlab = "CHART BEAUTY", ## label for the x-axis
#   ylab = "MAKER TRUST", ## label for the y-axis
#   # label.expression = rating < 5 & budget > 100, ## expression for deciding which points to label
#   point.label.args = list(alpha = 0.7, size = 4, color = "grey50"),
#   xfill = "#CC79A7", ## fill for marginals on the x-axis
#   yfill = "#009E73" ## fill for marginals on the y-axis
#   # title = "CHART TRUST (VS) MAKER TRUST",
#   # caption = ""
# )
# 
# gf_point( data = df_graphs %>% filter(STIMULUS=="B0-0"), CHART_TRUST~CHART_BEAUTY, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
# 
# gf_point( data = df_graphs %>% filter(STIMULUS=="B0-0"), MAKER_TRUST~CHART_BEAUTY, color = ~STIMULUS) %>% gf_facet_wrap(~STIMULUS)
# 
# 
# #TODO CENTERING AND NORMALIZING
# 
# ### little model
# df <- df_graphs %>% filter(STIMULUS == "B0-0")
# m <- lm( CHART_TRUST ~ CHART_BEAUTY,data = df)
# summary(m)
# ggnostic(m) #GGALLY MODEL CHECKS
# check_model(m) #EASY STATS MODEL CHECKS 
# 
# report(m)
### MORE COMPLEX MODEL
# df <- df_graphs %>% filter(STIMULUS != "B0-0")
# m1 <- lmer(CHART_TRUST ~ CHART_BEAUTY + (1|ID.Qualtrics), data = df)
# m2 <- lmer(CHART_TRUST ~ CHART_BEAUTY + (1|STIMULUS), data = df)
# m3 <- lmer(CHART_TRUST ~ CHART_BEAUTY + (1 | STIMULUS) + (1 | ID.Qualtrics), data = df)
# summary(m1)
# summary(m2)
# summary(m3)
# compare_parameters(m1,m2,m3)
# compare_performance(m1,m2,m3, rank = TRUE)
# 
# 
# m <- lmer(CHART_TRUST ~ CHART_BEAUTY + MAKER_TRUST + (1 | STIMULUS) ,  data = df)
# mi <- lmer(CHART_TRUST ~ CHART_BEAUTY * MAKER_TRUST + (1 | STIMULUS) , data = df)
# summary(m)
# summary(mi)
# 
# compare_performance(m3,m,mi, rank = TRUE)
# 
# report(m)
#todo see https://yury-zablotski.netlify.app/post/mixed-effects-models-2/ about fitting with ML vs REML for model comparison 
# #TODO WALK THROUGH THIS 
# #https://yury-zablotski.netlify.app/post/mixed-models/#multiple-random-slope-model
# 
# #load example data
# data("sleepstudy")
# 
# #fit the model
# m_slp <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
# 
# #the next line put all the estimated intercept and slope per subject into a dataframe
# reaction_slp <- as.data.frame(t(apply(ranef(m_slp)$Subject, 1,function(x) fixef(m_slp) + x)))
# 
# #to get the predicted regression lines we need one further step, writing the linear equation: Intercept + Slope*Days with different coefficient for each subject
# pred_slp <- melt(apply(reaction_slp,1,function(x) x[1] + x[2]*0:9), value.name = "Reaction")
# 
# #some re-formatting for the plot
# names(pred_slp)[1:2] <- c("Days","Subject")
# pred_slp$Days <- pred_slp$Days - 1
# pred_slp$Subject <- as.factor(pred_slp$Subject)
# 
# #plot with actual data 
# ggplot(pred_slp,aes(x=Days,y=Reaction,color=Subject))+
#   geom_line()+
#   geom_point(data=sleepstudy,aes(x=Days,y=Reaction))+
#   facet_wrap(~Subject,nrow=3)

11 GENERATE SEMANTIC DIFFERENTIAL GRAPHS

NOTE: the following blocks run if the graph_render var is set to TRUE in the header block. Graphs are generated (takes a long time) and written to the figs folder. Only do this data are are added or excluded

11.1 ALL QUESTIONS

11.1.1 MASTER — ALL STIMULI

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){


  
  plot_sd = function (data, column, type, facet, facet_by, boxplot) {

  ggplot(df, aes(y = .data[[column]], x="")) +
    {if(boxplot) geom_boxplot(width = 0.5) } +
    geom_jitter(width = 0.1, alpha=0.3, {if(facet) aes(color=.data[[facet_by]])}) +
    {if(facet) facet_grid(.data[[facet_by]] ~ .)} +
    scale_y_continuous(limits=c(-1,101)) +
    labs(x="", y="") +
    coord_flip()  +
    {if(type == "S")
      guides(
        y = guide_axis_manual(labels = ref_labels[column,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
      )} +
    {if(type == "Q")
      guides(
        y = guide_axis_manual(labels = ref_labels[q,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
      )} +
  theme_minimal()  +
     labs (
       caption = column
     ) + easy_remove_legend()
  }
  
#################### ALL QUESTIONS across ALL STIMULUS #####################

  df <- df_graphs 

  #subset data cols 
  cols <- df %>% select( all_of(ref_sd_questions))
  plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", facet = FALSE, boxplot=TRUE))
  
  #aggregate q plots into one for stimulus 
  plot_master_questions <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
   plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] + 
   plot_annotation(
     title = "ALL STIMULI",
     subtitle =""
   )
  
  ggsave(plot = plot_master_questions, path="figs/_master", filename =paste0("combined_stimuli","_box.png"), units = c("in"), width = 10, height = 14  )
  
  
  #### RAINCLOUD PLOT#############################################################################
  # setup dataframe 
  df <- df_sd_questions_long %>% select(1:8, QUESTION, value) 

  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value, fill = fct_rev(QUESTION))) +
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.7) +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  guides(
    # y = guide_axis_manual(labels = paste(levels(df$QUESTION),ref_labels$left)),
    y = guide_axis_manual(labels = rev(ref_labels$left)),
    y.sec = guide_axis_manual(labels = rev(ref_labels$right))
  ) + 
  labs (title = "ALL STIMULI") +
  theme_minimal() 
   
  
  ggsave(plot = x, path="figs/_master", filename =paste0("combined_stimuli","_rain.png"), units = c("in"), width = 10, height = 14  )


  
  
  #### DENSITY RIDGES#############################################################################
  # setup dataframe 
  
  magic_colors = list(
  amy_gradient =  c("#ac57aa", "#9e5fa4", "#90689f", "#827099", "#747894", "#66818e", "#578988", "#499183", "#3b997d", "#2da278", "#1faa72"),
  my_favourite_colours = c("#702963", "#637029",    "#296370")
)
  
  
  magic_palettes = function(name, n, all_palettes = magic_colors, type = c("discrete", "continuous")) {
  palette = all_palettes[[name]]
  if (missing(n)) {
    n = length(palette)
  }
  type = match.arg(type)
  out = switch(type,
               continuous = grDevices::colorRampPalette(palette)(n),
               discrete = palette[1:n]
  )
  structure(out, name = name, class = "palette")
}
  

  
  df <- df_sd_questions_long %>% select(1:8, QUESTION, value) 
  
  
 (x <-  ggplot(df, aes(x = value, y = fct_rev(QUESTION), fill = fct_rev(QUESTION))) +
    geom_density_ridges(scale = 1.5,quantile_lines = TRUE, alpha = 0.75) + 
    # scale_fill_paletteer_d("colorBlindness::SteppedSequential5Steps")+ #don't hate this
    # scale_fill_paletteer_d("dichromat::SteppedSequential_5", direction = 1)+  #don't hate this
    # scale_fill_paletteer_d("ggthemes::Hue_Circle", direction = 1)+  #LOVE THIS
    # scale_fill_paletteer_d("ggthemes::Classic_Cyclic", direction = 1)+  #LOVE THIS
    ggplot2::scale_fill_manual(values = magic_palettes("amy_gradient"))+
    theme_minimal() +
    guides(
    # y = guide_axis_manual(labels = paste(levels(df$QUESTION),ref_labels$left)),
    # y = guide_axis_manual(labels = paste(rev(levels(df$QUESTION)))),
    y = guide_axis_manual(labels = rev(ref_labels$left)),
    y.sec = guide_axis_manual(labels = rev(ref_labels$right))
  ) + labs(
    title = "ALL STIMULI — SD questions"
  ))
  
  ggsave(plot = x, path="figs/_master", filename =paste0("combined_stimuli","_ridges.png"), units = c("in"), width = 10, height = 14  )

  
  
}

11.1.2 ALL Q BY STIMULUS

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){


#################### ALL QUESTIONS AT STIMULUS #####################
# ONE PLOT FOR EACH STIMULUS WITH ALL QUESTIONS

#set stimuli to be graphed 
stimuli <- ref_stimuli #created in wrangling block
# stimili <- c("B0-0","B2-1" ,"B2-2", "B2-3", "B2-4")

box_stimuli <- list()
rain_stimuli <- list()
i = 0
for (s in stimuli){
  i = i+1
  
  # setup dataframe 
  title <- df_stimuli %>% filter(ID == s) %>% select(NAME) 
  title <- paste(s,"|",title)
  df <- df_graphs %>% filter(STIMULUS== s)

  #subset data cols 
  cols <- df %>% select( all_of(ref_sd_questions))
  plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", facet = FALSE, boxplot=TRUE))

  #aggregate q plots into one for stimulus 
  x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
   plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] + 
   plot_annotation(
     title = title,
     subtitle =""
   )
  
  box_stimuli[[i]] <- x
  ggsave(plot = x, path="figs/all_q_by_stimulus", filename =paste0(s,"_box.png"), units = c("in"), width = 10, height = 14  )
  
  
  #### RAINCLOUD PLOT
  # setup dataframe 
  title <- df_stimuli %>% filter(ID == s) %>% select(NAME) 
  title <- paste(s,"|",title)
  df <- df_sd_questions_wide %>% select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>%  mutate(value = get(s))


  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value, fill = fct_rev(QUESTION))) +
  stat_slab(aes(thickness = after_stat(pdf*n)), scale = 0.7) +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  guides(
    # y = guide_axis_manual(labels = paste(levels(df$QUESTION),ref_labels$left)),
    y = guide_axis_manual(labels = rev(ref_labels$left)),
    y.sec = guide_axis_manual(labels = rev(ref_labels$right))
  ) + 
  labs (title = title) +
  theme_minimal() 
   
  
  rain_stimuli[[i]] <- x
  ggsave(plot = x, path="figs/all_q_by_stimulus", filename =paste0(s,"_rain.png"), units = c("in"), width = 10, height = 14  )
  
  
  
}

#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_stimulus$`B1-1`
#ALSO plots_stimulus$`B1-1`[[1]]
names(box_stimuli) <- stimuli
names(rain_stimuli) <- stimuli
rm(x, i, plots)

#############################################################################
} 

11.1.3 ALL Q BY CATEGORY

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){

#################### ALL QUESTIONS AT CATEGORY #####################
# ONE PLOT FOR EACH CATEGORY WITH ALL QUESTIONS

#set questions to be graphed 
categories <- unique(df_graphs$STIMULUS_CATEGORY)#created in wrangling block
box_category <- list()
rain_category <- list()

i = 0
for (c in categories){
  i = i+1

  # setup dataframe
  title <- paste("STIMULUS CATEGORY ", c)
  df <- df_graphs %>% filter(STIMULUS_CATEGORY == c)
  
  #subset data cols
  cols <- df %>% select( all_of(ref_sd_questions))
  plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type = "S", facet = FALSE, boxplot=TRUE))
  
  #aggregate stimulus plots into block for question
  x <- plots[[1]] / plots[[2]]  / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
        plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] +
      plot_annotation(
        title = title,
       subtitle =""
      )

  box_category[[i]] <- x
  ggsave(plot = x, path="figs/all_q_by_category", filename =paste0(c,"_box.png"), units = c("in"), width = 10, height = 26)
  
  
  
  
  #### RAINCLOUD PLOT
  # setup dataframe 
  title <- paste("STIMULUS CATEGORY ", c)
  df <- df_sd_questions_long %>% filter(STIMULUS_CATEGORY == c) %>% select(STIMULUS_CATEGORY, QUESTION, value, PID, Assigned.Block) 
  
  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(QUESTION), x = value, fill = fct_rev(QUESTION))) +
  stat_slab(normalize="groups", trim = TRUE, scale = 0.7) +
  stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  guides(
    y = guide_axis_manual(labels = paste(rev(levels(df$QUESTION)),rev(ref_labels$left))),
    y = guide_axis_manual(labels = rev(ref_labels$left)),
    y.sec = guide_axis_manual(labels = rev(ref_labels$right))
  ) +
  labs (title = title) +
  theme_minimal() 
   
  
  rain_category[[i]] <- x
  ggsave(plot = x, path="figs/all_q_by_category", filename =paste0(c,"_rain.png"), units = c("in"), width = 10, height = 26  )
  
  
}  

#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_category$A
#ALSO plots_category$A[[1]]
names(box_category) <- categories
names(rain_category) <- categories
rm(x, i, plots)

}

11.2 BY QUESTION

11.2.1 Q for ALL STIMULI

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){

####################CREATE SD PLOTS FOR EACH QUESTION#####################

#set questions to be graphed 
questions <- ref_sd_questions  #created in wrangling block
box_s_question <- list()
rain_s_question <- list()
i = 0
for (q in questions){
  i = i+1
  
  # setup dataframe 
  title <- paste(q)
  df <- df_sd_questions_wide %>% filter(QUESTION == q)

  #subset data cols 
  c <- df %>% select( all_of(ref_stimuli))
  plots <- as.list(lapply(colnames(c), plot_sd, data = df, type = "Q", facet = FALSE, boxplot=TRUE))
  
  #aggregate stimulus plots into block for question
  x <- plots[[1]] / plots[[2]]  / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
        plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] /  plots[[12]] / plots[[13]] / plots[[14]] / 
        plots[[15]] /plots[[16]] /plots[[17]] /plots[[18]] /  plots[[19]] / plots[[20]] / plots[[21]] /
        plots[[22]] /plots[[23]] /plots[[24]] /plots[[25]] + 
      plot_annotation(
        title = title,
       subtitle =""
      )
  box_s_question[[i]] <- x 
  ggsave(plot = x, path="figs/by_q_for_all_stimuli", filename =paste0(q,"_box.png"), units = c("in"), width = 10, height = 26)

  
  
  
  ############# RAINCLOUD
  # setup dataframe 
  title <- paste(q)
  df <- df_sd_questions_long %>% filter(QUESTION ==q)
    #select(1:6, QUESTION, all_of(s)) %>% filter(!is.na(s)) %>%  mutate(value = get(s))
  left <- ref_labels[q,]$left
  right <- ref_labels[q,]$right

  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = fct_rev(STIMULUS) , x = value, fill = fct_rev(STIMULUS))) +
      stat_slab(normalize="groups", scale = 0.7) +
      stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) +
  labs (title = title) +
  theme_minimal() 
   
  
  rain_s_question[[i]] <- x
  ggsave(plot = x, path="figs/by_q_for_all_stimuli", filename =paste0(q,"_rain.png"), units = c("in"), width = 10, height = 14  )
  

  
}

#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_s_question$MAKER_DESIGN
#ALSO plots_s_question$MAKER_DESIGN[[1]]
names(box_s_question) <- questions
names(rain_s_question) <- questions
rm(x, i, plots, left, right)

#############################################################################

}

11.2.2 Q for ALL CATEGORIES

##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
  
####################SD PLOTS QUESTION BY CATEGORY #####################

#set grouping column 
g = "STIMULUS_CATEGORY" #string name of column to group by 

#set questions to be graphed 
questions <- ref_sd_questions#created in wrangling block
box_c_question <- list()
rain_c_question <- list()
i = 0
for (q in questions){
  i = i+1
  left <-   ref_labels[q,]$left
  right <-  ref_labels[q,]$right
  
  # setup dataframe 
  title <- paste(q," BY CATEGORY")
  df <- df_graphs 
  
  x <- multi_sd(df, left, right, x = q, y = g, color = g) + 
      labs(title  = q) 
 
  box_c_question[[i]] <- x
  ggsave(plot = x, path="figs/by_q_for_all_categories", filename =paste0(q,"_box.png"))
  
  
  
  ############RAINCLOUDS
  # setup dataframe 
  title <- paste(q," BY CATEGORY")
  df <- df_sd_questions_long %>% filter(QUESTION ==q)
  left <-   ref_labels[q,]$left
  right <-  ref_labels[q,]$right

  #RAINCLOUD PLOT
  x <- ggplot(df, aes(y = STIMULUS_CATEGORY , x = value, fill = STIMULUS_CATEGORY)) +
      stat_slab(normalize="groups", scale = 0.7) +
      stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) +
  labs (title = title) +
  theme_minimal() 
   
  
  rain_c_question[[i]] <- x
  ggsave(plot = x, path="figs/by_q_for_all_categories", filename =paste0(q,"_rain.png"), units = c("in"), width = 10, height = 14  )

  
}

#GIVE NAMES TO LIST
#NOW CAN ACCESS plots by plots_c_question$MAKER_DESIGN
#ALSO plots_c_question$MAKER_DESIGN[[1]]
names(box_c_question) <- questions
names(rain_c_question) <- questions
rm(x, i, right, left, df)
  
#############################################################################

}

12 STASH

wip code stash

12.0.1 COLOR palettes

#DISPLAY COLOR PALETTE
#display.brewer.pal(n = 8, name = 'Dark2')

#DISPLAY COLOR PALETTE
# paletteer_d("Redmonder::dPBIPuGn")

12.0.2 DISTRIBUTION plots

# #SET UP DATAFRAME
# df <- df_data
# 
# ## DENSITY HISTOGRAPH
# ggplot(data = df_data, aes( x = duration.min, fill = Assigned.Block)) +
#   geom_density(alpha = 0.5) +
#   facet_grid(rows = vars(Assigned.Block)) +
#   labs( x = "Survey Response Time (mins)",
#         title = "TOTAL Response Time by Sample",
#         subtitle = "(expect similiar across samples)") +
#   easy_add_legend_title("Sample") +
#   theme_minimal()
# 
# 
# ## RAINCLOUD
# ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
#   stat_slab(aes(thickness = after_stat(pdf*n), alpha=0.5), scale = 0.7) +
#   stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA) +
#   scale_fill_discrete(direction=-1)+
#   xlim(0,225) +
#   labs( x = "Survey Response Time (mins)", y="",
#         title = "TOTAL Response Time by Sample",
#         subtitle = "(expect similiar across samples)") +
#   theme_minimal() +
#   easy_remove_legend()
# 
# 
# ##RIDGEPLOT
# ggplot(df, aes(x = duration.min, y = fct_rev(Assigned.Block), fill = fct_rev(Assigned.Block))) +
#   geom_density_ridges(scale=0.9) +
#   # geom_boxplot()+
#   stat_pointinterval()+
#   theme_ridges() +
#   scale_fill_discrete(direction=-1)+
#   theme(legend.position = "none") +
#     labs( x = "Survey Response Time (mins)", y="",
#         title = "TOTAL Response Time by Sample",
#         subtitle = "(expect similiar across samples)")

12.0.3 all-q for stimulus

# s = "B0-0" #stimulus code
# t = "STIMULUS B0 — MILLENIAL PINK PLANTS" #STIMULUS TITLE
# 
# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == s)
# 
# 
# #BUILD SEMANTIC DIFFERENTIALS
# m_design <- single_sd(df, "professional", "layperson", x = MAKER_DESIGN) +
#       labs(title  = "MAKER-DESIGN") +
#       easy_remove_legend()
# 
# m_data <- single_sd(df, "professional", "layperson", x = MAKER_DATA) +
#       labs(title  = "MAKER-DATA") +
#       easy_remove_legend()
# 
# m_politics <- single_sd(df, "left-leaning", "right-leaning", x = MAKER_POLITIC) +
#       labs(title  = "MAKER-POLITICS") +
#       easy_remove_legend()
# 
# m_argue <- single_sd(df, "confrontational", "diplomatic", x = MAKER_ARGUE) +
#       labs(title  = "MAKER-ARGUE") +
#       easy_remove_legend()
# 
# m_selfish <- single_sd(df, "altruistic", "selfish", x = MAKER_SELF) +
#       labs(title  = "MAKER-SELFISH") +
#       easy_remove_legend()
# 
# m_align <- single_sd(df, "does NOT share", "does share", x = MAKER_ALIGN) +
#       labs(title  = "MAKER-ALIGNMENT") +
#       easy_remove_legend()
# 
# m_trust <- single_sd(df, "untrustworthy", "trustworthy", x = MAKER_TRUST) +
#       labs(title  = "MAKER-TRUST") +
#       easy_remove_legend()
# 
# 
# #BUILD MASTER PLOT
# PLOT_maker <- (m_design / m_data / m_politics / m_argue / m_selfish / m_align / m_trust) +
#   plot_annotation(
#     title = t,
#     subtitle =""
#   )
# 
# PLOT_maker

12.0.4 all-q for stimulus - grouped

#  s = "B0-0" #stimulus code
#  t = "STIMULUS B0 — MILLENIAL PINK PLANTS" #STIMULUS TITLE
#  g = "Assigned.Block"
#  ##can also use y and color to split by additional variable 
#  
#  #FILTER DATAFRAME
#  df <- df_graphs %>% filter(STIMULUS == s)
# 
#  #BUILD SEMANTIC DIFFERENTIALS
#  m_design <- multi_sd(df, "professional", "layperson", x = "MAKER_DESIGN", y = g, color = g) +
#        labs(title  = "MAKER-DESIGN") +
#        easy_remove_legend()
# 
#  m_data <- multi_sd(df, "professional", "layperson", x = "MAKER_DATA", y = g, color = g) +
#        labs(title  = "MAKER-DATA") +
#        easy_remove_legend()
# 
#  m_politics <- multi_sd(df, "left-leaning", "right-leaning", x = "MAKER_POLITIC", y = g, color = g) +
#        labs(title  = "MAKER-POLITICS") +
#        easy_remove_legend()
# 
#  m_argue <- multi_sd(df, "confrontational", "diplomatic", x = "MAKER_ARGUE", y = g, color = g) +
#        labs(title  = "MAKER-ARGUE") +
#        easy_remove_legend()
# 
#  m_selfish <- multi_sd(df, "altruistic", "selfish", x = "MAKER_SELF", y = g, color = g) +
#        labs(title  = "MAKER-SELFISH") +
#        easy_remove_legend()
# 
#  m_align <- multi_sd(df, "does NOT share", "does share", x = "MAKER_ALIGN", y = g, color = g) +
#        labs(title  = "MAKER-ALIGNMENT") +
#        easy_remove_legend()
# 
#  m_trust <- multi_sd(df, "untrustworthy", "trustworthy", x = "MAKER_TRUST", y = g, color = g) +
#        labs(title  = "MAKER-TRUST") 
# 
# 
#  #BUILD MASTER PLOT
#  PLOT_maker <- (m_design / m_data / m_politics / m_argue / m_selfish / m_align / m_trust) +
#    plot_annotation(
#      title = t,
#      subtitle =""
#    )
# 
#  PLOT_maker

12.0.5 across Blocks

######### MAKER ATTRIBUTES ########################
# vals = c("datacollar", "bluecollar")
# leftside <- rep ("PROFESSIONAL",n_blocks )
# rightside <-rep ("LAYPERSON",n_blocks ) 
# b00_m_design <- ggplot(df, aes(y = MAKER_DESIGN, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-DESIGN")

# b00_m_data <- ggplot(df, aes(y = MAKER_DATA, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-DATA")


# leftside <- rep ("LEFT-WING",n_blocks )
# rightside <-rep ("RIGHT-WING",n_blocks ) 
# b00_m_politics <- ggplot(df, aes(y = MAKER_POLITIC, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-POLITICS")
  

# leftside <- rep ("CONFRONTATIONAL",n_blocks )
# rightside <-rep ("DIPLOMATIC",n_blocks ) 
# b00_m_argue <- ggplot(df, aes(y = MAKER_ARGUE, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-ARGUE")
  
# leftside <- rep ("ALTRUSITC",n_blocks )
# rightside <-rep ("SELFISH",n_blocks ) 
# b00_m_selfish <- ggplot(df, aes(y = MAKER_SELF, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-SELFISH")

# 
# leftside <- rep ("Does NOT",n_blocks )
# rightside <-rep ("DOES",n_blocks ) 
# b00_m_align <- ggplot(df, aes(y = MAKER_ALIGN, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-SHARES-MY-VALUES")

# leftside <- rep ("UNTRUSTWORTHY",n_blocks )
# rightside <-rep ("TRUSTWORTHY",n_blocks ) 
# b00_m_trust <- ggplot(df, aes(y = MAKER_TRUST, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   # easy_remove_legend() +
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-TRUST")
# 
# PLOT_b0_maker <- (b00_m_design / b00_m_data / b00_m_politics / b00_m_argue / b00_m_selfish / b00_m_align / b00_m_trust) + 
#   plot_annotation(
#     title = "STIMULUS B0 — MILLENIAL PINK PLANTS",
#     subtitle =""
#   )
# 
# PLOT_b0_maker
###################################################

12.0.6 single Block

# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == "B0-0")
# 
# ######### MAKER ATTRIBUTES ########################
# vals = c("datacollar", "bluecollar")
# leftside <- rep ("PROFESSIONAL",1 )
# rightside <-rep ("LAYPERSON",1 ) 
# b_m_design <- ggplot(df, aes(y = MAKER_DESIGN, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-DESIGN")
# 
# b_m_data <- ggplot(df, aes(y = MAKER_DATA, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-DATA")
# 
# 
# leftside <- rep ("LEFT-WING",1 )
# rightside <-rep ("RIGHT-WING",1 ) 
# b_m_politics <- ggplot(df, aes(y = MAKER_POLITIC, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-POLITICS")
#   
# 
# leftside <- rep ("CONFRONTATIONAL",1 )
# rightside <-rep ("DIPLOMATIC",1 ) 
# b_m_argue <- ggplot(df, aes(y = MAKER_ARGUE, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-ARGUE")
#   
# leftside <- rep ("ALTRUSITC",1 )
# rightside <-rep ("SELFISH",1 ) 
# b_m_selfish <- ggplot(df, aes(y = MAKER_SELF, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-SELFISH")
# 
# 
# leftside <- rep ("Does NOT",1 )
# rightside <-rep ("DOES",1 ) 
# b_m_align <- ggplot(df, aes(y = MAKER_ALIGN, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-SHARES-MY-VALUES")
# 
# leftside <- rep ("UNTRUSTWORTHY",1 )
# rightside <-rep ("TRUSTWORTHY",1 ) 
# b_m_trust <- ggplot(df, aes(y = MAKER_TRUST, x = ""))+
#   geom_boxplot(width = 0.5) +
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-TRUST")
# 
# 
# PLOT_b_maker <- (b_m_design / b_m_data / b_m_politics / b_m_argue / b_m_selfish / b_m_align / b_m_trust) + 
#   plot_annotation(
#     title = paste(unique(df$STIMULUS), "MAKER")
#   )
# 
# PLOT_b_maker
###################################################

12.0.7 single Block grouped

# 
# #FILTER DATAFRAME
# df <- df_graphs %>% filter(STIMULUS == "B0-0")
# 
# ######### MAKER ATTRIBUTES ########################
# vals = c("datacollar", "bluecollar")
# leftside <- rep ("PROFESSIONAL",1 )
# rightside <-rep ("LAYPERSON",1 ) 
# b_m_design <- ggplot(df, aes(y = MAKER_DESIGN, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-DESIGN")
# 
# b_m_data <- ggplot(df, aes(y = MAKER_DATA, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-DATA")
# 
# 
# leftside <- rep ("LEFT-WING",1 )
# rightside <-rep ("RIGHT-WING",1 ) 
# b_m_politics <- ggplot(df, aes(y = MAKER_POLITIC, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-POLITICS")
#   
# 
# leftside <- rep ("CONFRONTATIONAL",1 )
# rightside <-rep ("DIPLOMATIC",1 ) 
# b_m_argue <- ggplot(df, aes(y = MAKER_ARGUE, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-ARGUE")
#   
# leftside <- rep ("ALTRUSITC",1 )
# rightside <-rep ("SELFISH",1 ) 
# b_m_selfish <- ggplot(df, aes(y = MAKER_SELF, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-SELFISH")
# 
# 
# leftside <- rep ("Does NOT",1 )
# rightside <-rep ("DOES",1 ) 
# b_m_align <- ggplot(df, aes(y = MAKER_ALIGN, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-SHARES-MY-VALUES")
# 
# leftside <- rep ("UNTRUSTWORTHY",1 )
# rightside <-rep ("TRUSTWORTHY",1 ) 
# b_m_trust <- ggplot(df, aes(y = MAKER_TRUST, x = ID.Study, color = ID.Study))+
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   labs(x="", y="") +
#   coord_flip() + 
#   theme_minimal() + 
#   easy_remove_legend() + 
#   guides(
#   y = guide_axis_manual(
#   #breaks = vals, 
#   labels = leftside  
#   ),
#   y.sec = guide_axis_manual(
#   #breaks = vals, 
#   labels = rightside
# )) + 
#   labs(title  = "MAKER-TRUST")
# 
# 
# PLOT_b_maker <- (b_m_design / b_m_data / b_m_politics / b_m_argue / b_m_selfish / b_m_align / b_m_trust) + 
#   plot_annotation(
#     title = paste(unique(df$STIMULUS), "MAKER")
#   )
# 
# PLOT_b_maker
# ###################################################

12.0.8 graph matrix

# #### 
# q = "MAKER_DESIGN"
# df <- df_questions %>% filter(QUESTION == q) %>% 
#   mutate(
#     value = as.numeric(value),
#     STIMULUS_CATEGORY = str_remove(STIMULUS,"B.-"),
#     STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY,
#                   levels=c("0","1","2","3","4"),
#                   labels= c("F","A","B","C","D"))
# )
# 
# 
# g <- ggplot(df, aes(y = value, x="", color = STIMULUS_CATEGORY)) +
#     geom_boxplot(width = 0.5)+
#     geom_jitter(width = 0.1, alpha=0.3) + 
#     scale_y_continuous(limits=c(-1,101)) +
#     labs(x="", y="") +
#     coord_flip()  +
#     facet_grid(df$STIMULUS_CATEGORY ~ .) + 
#     labs(title=df$STIMULUS_CATEGORY) + 
#     guides(
#         y = guide_axis_manual(labels = ref_labels[q,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
#       ) +  
#   theme_minimal()  + 
#   labs (title = q)
# 
# g
# left <- rep(ref_labels[q,"left"], length(unique(df$STIMULUS_CATEGORY)))
# right <- rep(ref_labels[q,"left"], length(unique(df$STIMULUS_CATEGORY)))
# a <- multi_sd(df, x = value, y = QUESTION, color = STIMULUS_CATEGORY, left=, right=right)
# 
# 
# multi_sd <- function (df, x, y, color, left,right) { 
#   
#   g <- ggplot(df, aes(y = {{x}}, x = {{y}}, color = {{color}}))+
#   geom_boxplot(width = 0.5) + 
#   geom_jitter(width = 0.1, alpha=0.5) + 
#   scale_y_continuous(limits=c(-1,101)) +
#   facet_grid(rows=vars({{color}})) +
#   labs(x="", y="") +
#   coord_flip() + 
#   guides(
#     y = guide_axis_manual(labels = left),
#     y.sec = guide_axis_manual(labels = right)
#   ) + theme_minimal() 
#   
#   return(g)
# }




# ## GROUPED PLOTSD FUNCTION 
# grouped_plotsd = function (data, x, type, q, boxplot) {
#   ggplot(df, aes(y = .data[[x]], x="")) +
#     {if(boxplot) geom_boxplot(width = 0.5) } +
#     geom_jitter(width = 0.1, alpha=0.3) + 
#     scale_y_continuous(limits=c(-1,101)) +
#     labs(x="", y="") +
#     coord_flip()  +
#     {if(type == "Q")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[q,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
#       )} +  
#   theme_minimal()  + 
#      labs (
#        caption = column
#      )
# }


#####################################################################

# # stim <- "B1-1"
# 
# # setup dataframe 
# title <- df_stimuli %>% filter(ID ==stim) %>% select(NAME) 
# title <- paste(stim,"|",title)
# df <- df_graphs %>% filter(STIMULUS==stim)
# 
# #subset data cols 
# d <- df %>% select( all_of(questions))
# plots <- as.list(lapply(colnames(d), plotsd, data = df, boxplot=TRUE))
# 
# ## PICK UP HERE, FIGURE OUT HOW TO LOOP OVER STIM
# 
# x <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
#   plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] + 
#   plot_annotation(
#     title = title,
#     subtitle =""
#   )
# x
# ggsave(plot = x, filename =paste(stim,".png"), units = c("in"), width = 10, height = 14  )




#CREATE MATRIX 
# m <- matrix(nrow = length(stim), ncol = length(questions)) 
# rownames(m) <- stim
# colnames(m) <- questions

##END OF FILE